diff options
Diffstat (limited to 'fs/nfsd')
40 files changed, 7883 insertions, 4520 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 8df1ea4a6ff..f994e750e0d 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -65,8 +65,8 @@ config NFSD_V3_ACL If unsure, say N. config NFSD_V4 - bool "NFS server support for NFS version 4 (EXPERIMENTAL)" - depends on NFSD && PROC_FS && EXPERIMENTAL + bool "NFS server support for NFS version 4" + depends on NFSD && PROC_FS select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS @@ -81,6 +81,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers. + config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index 34e5c40af5e..a986ceb6fd0 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -35,25 +35,25 @@ #ifndef LINUX_NFS4_ACL_H #define LINUX_NFS4_ACL_H -#include <linux/posix_acl.h> +struct nfs4_acl; +struct svc_fh; +struct svc_rqst; -/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to - * fit in a page: */ -#define NFS4_ACL_MAX 170 +/* + * Maximum ACL we'll accept from a client; chosen (somewhat + * arbitrarily) so that kmalloc'ing the ACL shouldn't require a + * high-order allocation. This allows 204 ACEs on x86_64: + */ +#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ + / sizeof(struct nfs4_ace)) struct nfs4_acl *nfs4_acl_new(int); int nfs4_acl_get_whotype(char *, u32); -int nfs4_acl_write_who(int who, char *p); -int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, - uid_t who, u32 mask); - -#define NFS4_ACL_TYPE_DEFAULT 0x01 -#define NFS4_ACL_DIR 0x02 -#define NFS4_ACL_OWNER 0x04 +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); -struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, - struct posix_acl *, unsigned int flags); -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, - struct posix_acl **, unsigned int flags); +int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl); +__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 79717a40dab..72f44823adb 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -10,7 +10,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) + if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) return f->flags; } return exp->ex_flags; @@ -24,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); - int ret; validate_process_creds(); @@ -46,9 +45,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!gi) goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { - if (!new->fsuid) + if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) new->fsuid = exp->ex_anon_uid; - if (!new->fsgid) + if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) new->fsgid = exp->ex_anon_gid; gi = groups_alloc(rqgi->ngroups); @@ -56,7 +55,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) goto oom; for (i = 0; i < rqgi->ngroups; i++) { - if (!GROUP_AT(rqgi, i)) + if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) GROUP_AT(gi, i) = exp->ex_anon_gid; else GROUP_AT(gi, i) = GROUP_AT(rqgi, i); @@ -65,17 +64,15 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi = get_group_info(rqgi); } - if (new->fsuid == (uid_t) -1) + if (uid_eq(new->fsuid, INVALID_UID)) new->fsuid = exp->ex_anon_uid; - if (new->fsgid == (gid_t) -1) + if (gid_eq(new->fsgid, INVALID_GID)) new->fsgid = exp->ex_anon_gid; - ret = set_groups(new, gi); + set_groups(new, gi); put_group_info(gi); - if (ret < 0) - goto error; - if (new->fsuid) + if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) new->cap_effective = cap_drop_nfsd_set(new->cap_effective); else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, @@ -87,9 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) return 0; oom: - ret = -ENOMEM; -error: abort_creds(new); - return ret; + return -ENOMEM; } diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h index 78b3c0e9382..53325a12ba6 100644 --- a/fs/nfsd/auth.h +++ b/fs/nfsd/auth.h @@ -1,6 +1,5 @@ /* * nfsd-specific authentication stuff. - * uid/gid mapping not yet implemented. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ @@ -8,11 +7,6 @@ #ifndef LINUX_NFSD_AUTH_H #define LINUX_NFSD_AUTH_H -#define nfsd_luid(rq, uid) ((u32)(uid)) -#define nfsd_lgid(rq, gid) ((u32)(gid)) -#define nfsd_ruid(rq, uid) ((u32)(uid)) -#define nfsd_rgid(rq, gid) ((u32)(gid)) - /* * Set the current process's fsuid/fsgid etc to those of the NFS * client user diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d34c45..b582f9ab6b2 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,11 +24,13 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -46,8 +52,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* @@ -67,17 +72,16 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); - -#ifdef CONFIG_NFSD_V4 -void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); -#else /* CONFIG_NFSD_V4 */ -static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) -{ -} -#endif /* CONFIG_NFSD_V4 */ +int nfsd_reply_cache_stats_open(struct inode *, struct file *); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h new file mode 100644 index 00000000000..4123551208d --- /dev/null +++ b/fs/nfsd/current_stateid.h @@ -0,0 +1,28 @@ +#ifndef _NFSD4_CURRENT_STATE_H +#define _NFSD4_CURRENT_STATE_H + +#include "state.h" +#include "xdr4.h" + +extern void clear_current_stateid(struct nfsd4_compound_state *cstate); +/* + * functions to set current state id + */ +extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *); +extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *); +extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); + +/* + * functions to consume current state id + */ +extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *); +extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *); +extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *); +extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); +extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *); +extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *); +extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *); + +#endif /* _NFSD4_CURRENT_STATE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cf8a6bd062f..13b85f94d9e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -15,17 +15,14 @@ #include <linux/namei.h> #include <linux/module.h> #include <linux/exportfs.h> - -#include <net/ipv6.h> +#include <linux/sunrpc/svc_xprt.h> #include "nfsd.h" #include "nfsfh.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT -typedef struct auth_domain svc_client; -typedef struct svc_export svc_export; - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -38,7 +35,6 @@ typedef struct svc_export svc_export; #define EXPKEY_HASHBITS 8 #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static struct cache_head *expkey_table[EXPKEY_HASHMAX]; static void expkey_put(struct kref *ref) { @@ -66,18 +62,13 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - -static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); -static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); -static struct cache_detail svc_expkey_cache; +static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, + struct svc_expkey *old); +static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { - /* client fsidtype fsid [path] */ + /* client fsidtype fsid expiry [path] */ char *buf; int len; struct auth_domain *dom = NULL; @@ -87,7 +78,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mlen < 1 || mesg[mlen-1] != '\n') + if (mesg[mlen - 1] != '\n') return -EINVAL; mesg[mlen-1] = 0; @@ -131,7 +122,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) key.ek_fsidtype = fsidtype; memcpy(key.ek_fsid, buf, len); - ek = svc_expkey_lookup(&key); + ek = svc_expkey_lookup(cd, &key); err = -ENOMEM; if (!ek) goto out; @@ -145,7 +136,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) err = 0; if (len == 0) { set_bit(CACHE_NEGATIVE, &key.h.flags); - ek = svc_expkey_update(&key, ek); + ek = svc_expkey_update(cd, &key, ek); if (!ek) err = -ENOMEM; } else { @@ -155,7 +146,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) dprintk("Found the path %s\n", buf); - ek = svc_expkey_update(&key, ek); + ek = svc_expkey_update(cd, &key, ek); if (!ek) err = -ENOMEM; path_put(&key.ek_path); @@ -163,7 +154,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) cache_flush(); out: if (ek) - cache_put(&ek->h, &svc_expkey_cache); + cache_put(&ek->h, cd); if (dom) auth_domain_put(dom); kfree(buf); @@ -239,13 +230,12 @@ static struct cache_head *expkey_alloc(void) return NULL; } -static struct cache_detail svc_expkey_cache = { +static struct cache_detail svc_expkey_cache_template = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, - .hash_table = expkey_table, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -268,13 +258,12 @@ svc_expkey_hash(struct svc_expkey *item) } static struct svc_expkey * -svc_expkey_lookup(struct svc_expkey *item) +svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *item) { struct cache_head *ch; int hash = svc_expkey_hash(item); - ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, - hash); + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct svc_expkey, h); else @@ -282,13 +271,13 @@ svc_expkey_lookup(struct svc_expkey *item) } static struct svc_expkey * -svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) +svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, + struct svc_expkey *old) { struct cache_head *ch; int hash = svc_expkey_hash(new); - ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, - &old->h, hash); + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct svc_expkey, h); else @@ -299,17 +288,21 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) -static struct cache_head *export_table[EXPORT_HASHMAX]; - static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) { + struct nfsd4_fs_location *locations = fsloc->locations; int i; + if (!locations) + return; + for (i = 0; i < fsloc->locations_count; i++) { - kfree(fsloc->locations[i].path); - kfree(fsloc->locations[i].hosts); + kfree(locations[i].path); + kfree(locations[i].hosts); } - kfree(fsloc->locations); + + kfree(locations); + fsloc->locations = NULL; } static void svc_export_put(struct kref *ref) @@ -318,6 +311,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -340,11 +334,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -400,8 +389,12 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; + /* more than one fsloc */ + if (fsloc->locations) + return -EINVAL; + /* listsize */ - err = get_int(mesg, &fsloc->locations_count); + err = get_uint(mesg, &fsloc->locations_count); if (err) return err; if (fsloc->locations_count > MAX_FS_LOCATIONS) @@ -449,17 +442,22 @@ out_free_all: static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { - int listsize, err; struct exp_flavor_info *f; + u32 listsize; + int err; + + /* more than one secinfo */ + if (exp->ex_nflavors) + return -EINVAL; - err = get_int(mesg, &listsize); + err = get_uint(mesg, &listsize); if (err) return err; - if (listsize < 0 || listsize > MAX_SECINFO_LIST) + if (listsize > MAX_SECINFO_LIST) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { - err = get_int(mesg, &f->pseudoflavor); + err = get_uint(mesg, &f->pseudoflavor); if (err) return err; /* @@ -468,7 +466,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) * problem at export time instead of when a client fails * to authenticate. */ - err = get_int(mesg, &f->flags); + err = get_uint(mesg, &f->flags); if (err) return err; /* Only some flags are allowed to differ between flavors: */ @@ -486,6 +484,27 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ + int len; + + /* more than one uuid */ + if (*puuid) + return -EINVAL; + + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != EX_UUID_LEN) + return -EINVAL; + + *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); + if (*puuid == NULL) + return -ENOMEM; + + return 0; +} + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -525,6 +544,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out1; exp.ex_client = dom; + exp.cd = cd; /* expiry */ err = -EINVAL; @@ -546,13 +566,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_uid= an_int; + exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; - exp.ex_anon_gid= an_int; + exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); /* fsid */ err = get_int(&mesg, &an_int); @@ -563,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - else if (strcmp(buf, "uuid") == 0) { - /* expect a 16 byte uuid encoded as \xXXXX... */ - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len != 16) - err = -EINVAL; - else { - exp.ex_uuid = - kmemdup(buf, 16, GFP_KERNEL); - if (exp.ex_uuid == NULL) - err = -ENOMEM; - } - } else if (strcmp(buf, "secinfo") == 0) + else if (strcmp(buf, "uuid") == 0) + err = uuid_parse(&mesg, buf, &exp.ex_uuid); + else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything @@ -590,6 +601,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_uuid); if (err) goto out4; + /* + * No point caching this if it would immediately expire. + * Also, this protects exportfs's dummy export from the + * anon_uid/anon_gid checks: + */ + if (exp.h.expiry_time < seconds_since_boot()) + goto out4; + /* + * For some reason exportfs has been passing down an + * invalid (-1) uid & gid on the "dummy" export which it + * uses to test export support. To make sure exportfs + * sees errors from check_export we therefore need to + * delay these checks till after check_export: + */ + err = -EINVAL; + if (!uid_valid(exp.ex_anon_uid)) + goto out4; + if (!gid_valid(exp.ex_anon_gid)) + goto out4; + err = 0; } expp = svc_export_lookup(&exp); @@ -615,7 +646,7 @@ out: } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs); static void show_secinfo(struct seq_file *m, struct svc_export *exp); static int svc_export_show(struct seq_file *m, @@ -640,7 +671,7 @@ static int svc_export_show(struct seq_file *m, if (exp->ex_uuid) { int i; seq_puts(m, ",uuid="); - for (i=0; i<16; i++) { + for (i = 0; i < EX_UUID_LEN; i++) { if ((i&3) == 0 && i) seq_putc(m, ':'); seq_printf(m, "%02x", exp->ex_uuid[i]); @@ -672,6 +703,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; + new->cd = item->cd; } static void export_update(struct cache_head *cnew, struct cache_head *citem) @@ -707,13 +740,12 @@ static struct cache_head *svc_export_alloc(void) return NULL; } -struct cache_detail svc_export_cache = { +static struct cache_detail svc_export_cache_template = { .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, - .hash_table = export_table, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, @@ -739,8 +771,7 @@ svc_export_lookup(struct svc_export *exp) struct cache_head *ch; int hash = svc_export_hash(exp); - ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, - hash); + ch = sunrpc_cache_lookup(exp->cd, &exp->h, hash); if (ch) return container_of(ch, struct svc_export, h); else @@ -753,9 +784,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old) struct cache_head *ch; int hash = svc_export_hash(old); - ch = sunrpc_cache_update(&svc_export_cache, &new->h, - &old->h, - hash); + ch = sunrpc_cache_update(old->cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct svc_export, h); else @@ -764,7 +793,8 @@ svc_export_update(struct svc_export *new, struct svc_export *old) static struct svc_expkey * -exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, + u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; int err; @@ -776,18 +806,18 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - ek = svc_expkey_lookup(&key); + ek = svc_expkey_lookup(cd, &key); if (ek == NULL) return ERR_PTR(-ENOMEM); - err = cache_check(&svc_expkey_cache, &ek->h, reqp); + err = cache_check(cd, &ek->h, reqp); if (err) return ERR_PTR(err); return ek; } - -static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, - struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, + const struct path *path, struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -797,11 +827,12 @@ static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, key.ex_client = clp; key.ex_path = *path; + key.cd = cd; exp = svc_export_lookup(&key); if (exp == NULL) return ERR_PTR(-ENOMEM); - err = cache_check(&svc_export_cache, &exp->h, reqp); + err = cache_check(cd, &exp->h, reqp); if (err) return ERR_PTR(err); return exp; @@ -810,16 +841,17 @@ static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(svc_client *clp, struct path *path) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path) { struct dentry *saved = dget(path->dentry); - svc_export *exp = exp_get_by_name(clp, path, NULL); + struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL); while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { struct dentry *parent = dget_parent(path->dentry); dput(path->dentry); path->dentry = parent; - exp = exp_get_by_name(clp, path, NULL); + exp = exp_get_by_name(cd, clp, path, NULL); } dput(path->dentry); path->dentry = saved; @@ -834,13 +866,16 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) +exp_rootfh(struct net *net, struct auth_domain *clp, char *name, + struct knfsd_fh *f, int maxsize) { struct svc_export *exp; struct path path; struct inode *inode; struct svc_fh fh; int err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; err = -EPERM; /* NB: we probably ought to check that it's NUL-terminated */ @@ -853,7 +888,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, &path); + exp = exp_parent(cd, clp, &path); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -875,16 +910,18 @@ out: return err; } -static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, +static struct svc_export *exp_find(struct cache_detail *cd, + struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_export *exp; - struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); + struct nfsd_net *nn = net_generic(cd->net, nfsd_net_id); + struct svc_expkey *ek = exp_find_key(nn->svc_expkey_cache, clp, fsid_type, fsidv, reqp); if (IS_ERR(ek)) return ERR_CAST(ek); - exp = exp_get_by_name(clp, &ek->ek_path, reqp); - cache_put(&ek->h, &svc_expkey_cache); + exp = exp_get_by_name(cd, clp, &ek->ek_path, reqp); + cache_put(&ek->h, nn->svc_expkey_cache); if (IS_ERR(exp)) return ERR_CAST(exp); @@ -901,13 +938,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) return 0; /* ip-address based client; check sec= export option: */ for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) + if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) return 0; } /* defaults in absence of sec= options: */ if (exp->ex_nflavors == 0) { - if (rqstp->rq_flavor == RPC_AUTH_NULL || - rqstp->rq_flavor == RPC_AUTH_UNIX) + if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || + rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) return 0; } return nfserr_wrongsec; @@ -926,12 +963,14 @@ struct svc_export * rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); + exp = exp_get_by_name(cd, rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -943,7 +982,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); + gssexp = exp_get_by_name(cd, rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -955,12 +994,15 @@ struct svc_export * rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) goto gss; /* First try the auth_unix client: */ - exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle); + exp = exp_find(cd, rqstp->rq_client, fsid_type, + fsidv, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -972,7 +1014,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv, + gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; @@ -1029,13 +1071,15 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) /* Iterator */ static void *e_start(struct seq_file *m, loff_t *pos) - __acquires(svc_export_cache.hash_lock) + __acquires(((struct cache_detail *)m->private)->hash_lock) { loff_t n = *pos; unsigned hash, export; struct cache_head *ch; - - read_lock(&svc_export_cache.hash_lock); + struct cache_detail *cd = m->private; + struct cache_head **export_table = cd->hash_table; + + read_lock(&cd->hash_lock); if (!n--) return SEQ_START_TOKEN; hash = n >> 32; @@ -1060,6 +1104,8 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); + struct cache_detail *cd = m->private; + struct cache_head **export_table = cd->hash_table; if (p == SEQ_START_TOKEN) hash = 0; @@ -1082,9 +1128,11 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) } static void e_stop(struct seq_file *m, void *p) - __releases(svc_export_cache.hash_lock) + __releases(((struct cache_detail *)m->private)->hash_lock) { - read_unlock(&svc_export_cache.hash_lock); + struct cache_detail *cd = m->private; + + read_unlock(&cd->hash_lock); } static struct flags { @@ -1165,15 +1213,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp) } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc) { show_expflags(m, flag, NFSEXP_ALLFLAGS); if (flag & NFSEXP_FSID) seq_printf(m, ",fsid=%d", fsid); - if (anonu != (uid_t)-2 && anonu != (0x10000-2)) - seq_printf(m, ",anonuid=%u", anonu); - if (anong != (gid_t)-2 && anong != (0x10000-2)) - seq_printf(m, ",anongid=%u", anong); + if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) && + !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu)); + if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) && + !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong)); if (fsloc && fsloc->locations_count > 0) { char *loctype = (fsloc->migrated) ? "refer" : "replicas"; int i; @@ -1195,6 +1245,7 @@ static int e_show(struct seq_file *m, void *p) { struct cache_head *cp = p; struct svc_export *exp = container_of(cp, struct svc_export, h); + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) { seq_puts(m, "# Version 1.1\n"); @@ -1203,10 +1254,10 @@ static int e_show(struct seq_file *m, void *p) } cache_get(&exp->h); - if (cache_check(&svc_export_cache, &exp->h, NULL)) + if (cache_check(cd, &exp->h, NULL)) return 0; - cache_put(&exp->h, &svc_export_cache); - return svc_export_show(m, &svc_export_cache, cp); + exp_put(exp); + return svc_export_show(m, cd, cp); } const struct seq_operations nfs_exports_op = { @@ -1216,48 +1267,70 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; - /* * Initialize the exports module. */ int -nfsd_export_init(void) +nfsd_export_init(struct net *net) { int rv; - dprintk("nfsd: initializing export module.\n"); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + dprintk("nfsd: initializing export module (net: %p).\n", net); - rv = cache_register_net(&svc_export_cache, &init_net); + nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net); + if (IS_ERR(nn->svc_export_cache)) + return PTR_ERR(nn->svc_export_cache); + rv = cache_register_net(nn->svc_export_cache, net); if (rv) - return rv; - rv = cache_register_net(&svc_expkey_cache, &init_net); + goto destroy_export_cache; + + nn->svc_expkey_cache = cache_create_net(&svc_expkey_cache_template, net); + if (IS_ERR(nn->svc_expkey_cache)) { + rv = PTR_ERR(nn->svc_expkey_cache); + goto unregister_export_cache; + } + rv = cache_register_net(nn->svc_expkey_cache, net); if (rv) - cache_unregister_net(&svc_export_cache, &init_net); - return rv; + goto destroy_expkey_cache; + return 0; +destroy_expkey_cache: + cache_destroy_net(nn->svc_expkey_cache, net); +unregister_export_cache: + cache_unregister_net(nn->svc_export_cache, net); +destroy_export_cache: + cache_destroy_net(nn->svc_export_cache, net); + return rv; } /* * Flush exports table - called when last nfsd thread is killed */ void -nfsd_export_flush(void) +nfsd_export_flush(struct net *net) { - cache_purge(&svc_expkey_cache); - cache_purge(&svc_export_cache); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + cache_purge(nn->svc_expkey_cache); + cache_purge(nn->svc_export_cache); } /* * Shutdown the exports module. */ void -nfsd_export_shutdown(void) +nfsd_export_shutdown(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: shutting down export module.\n"); + dprintk("nfsd: shutting down export module (net: %p).\n", net); - cache_unregister_net(&svc_expkey_cache, &init_net); - cache_unregister_net(&svc_export_cache, &init_net); - svcauth_unix_purge(); + cache_unregister_net(nn->svc_expkey_cache, net); + cache_unregister_net(nn->svc_export_cache, net); + cache_destroy_net(nn->svc_expkey_cache, net); + cache_destroy_net(nn->svc_export_cache, net); + svcauth_unix_purge(net); - dprintk("nfsd: export shutdown complete.\n"); + dprintk("nfsd: export shutdown complete (net: %p).\n", net); } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 00000000000..cfeea85c5be --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include <linux/sunrpc/cache.h> +#include <uapi/linux/nfsd/export.h> + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS 128 + +struct nfsd4_fs_location { + char *hosts; /* colon separated list of hosts */ + char *path; /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { + uint32_t locations_count; + struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ + int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 +#define EX_UUID_LEN 16 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + +struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; + int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + uint32_t ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client. There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { + struct cache_head h; + + struct auth_domain * ek_client; + int ek_fsidtype; + u32 ek_fsid[6]; + + struct path ek_path; +}; + +#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); +void nfsd_export_flush(struct net *); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct path *); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct path *); +struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); +int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ + cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ + cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index ce7f0758d84..2ed05c3cd43 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -8,60 +8,133 @@ #include <linux/fs.h> #include <linux/debugfs.h> #include <linux/module.h> +#include <linux/nsproxy.h> +#include <linux/sunrpc/addr.h> +#include <asm/uaccess.h> #include "state.h" -#include "fault_inject.h" +#include "netns.h" struct nfsd_fault_inject_op { char *file; - void (*func)(u64); + u64 (*forget)(struct nfs4_client *, u64); + u64 (*print)(struct nfs4_client *, u64); }; static struct nfsd_fault_inject_op inject_ops[] = { { .file = "forget_clients", - .func = nfsd_forget_clients, + .forget = nfsd_forget_client, + .print = nfsd_print_client, }, { .file = "forget_locks", - .func = nfsd_forget_locks, + .forget = nfsd_forget_client_locks, + .print = nfsd_print_client_locks, }, { .file = "forget_openowners", - .func = nfsd_forget_openowners, + .forget = nfsd_forget_client_openowners, + .print = nfsd_print_client_openowners, }, { .file = "forget_delegations", - .func = nfsd_forget_delegations, + .forget = nfsd_forget_client_delegations, + .print = nfsd_print_client_delegations, }, { .file = "recall_delegations", - .func = nfsd_recall_delegations, + .forget = nfsd_recall_client_delegations, + .print = nfsd_print_client_delegations, }, }; static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static int nfsd_inject_set(void *op_ptr, u64 val) +static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) { - struct nfsd_fault_inject_op *op = op_ptr; + u64 count = 0; if (val == 0) printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); else printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); - op->func(val); - return 0; + nfs4_lock_state(); + count = nfsd_for_n_state(val, op->forget); + nfs4_unlock_state(); + printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); } -static int nfsd_inject_get(void *data, u64 *val) +static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, + size_t addr_size) { - return 0; + char buf[INET6_ADDRSTRLEN]; + struct nfs4_client *clp; + u64 count; + + nfs4_lock_state(); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + count = op->forget(clp, 0); + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); + } + nfs4_unlock_state(); +} + +static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) +{ + nfs4_lock_state(); + *val = nfsd_for_n_state(0, op->print); + nfs4_unlock_state(); } -DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n"); +static ssize_t fault_inject_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + static u64 val; + char read_buf[25]; + size_t size; + loff_t pos = *ppos; + + if (!pos) + nfsd_inject_get(file_inode(file)->i_private, &val); + size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); + + return simple_read_from_buffer(buf, len, ppos, read_buf, size); +} + +static ssize_t fault_inject_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + char write_buf[INET6_ADDRSTRLEN]; + size_t size = min(sizeof(write_buf) - 1, len); + struct net *net = current->nsproxy->net_ns; + struct sockaddr_storage sa; + u64 val; + + if (copy_from_user(write_buf, buf, size)) + return -EFAULT; + write_buf[size] = '\0'; + + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); + if (size > 0) + nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); + else { + val = simple_strtoll(write_buf, NULL, 0); + nfsd_inject_set(file_inode(file)->i_private, val); + } + return len; /* on success, claim we got the whole input */ +} + +static const struct file_operations fops_nfsd = { + .owner = THIS_MODULE, + .read = fault_inject_read, + .write = fault_inject_write, +}; void nfsd_fault_inject_cleanup(void) { @@ -72,7 +145,7 @@ int nfsd_fault_inject_init(void) { unsigned int i; struct nfsd_fault_inject_op *op; - mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; debug_dir = debugfs_create_dir("nfsd", NULL); if (!debug_dir) diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h deleted file mode 100644 index 90bd0570956..00000000000 --- a/fs/nfsd/fault_inject.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> - * - * Function definitions for fault injection - */ - -#ifndef LINUX_NFSD_FAULT_INJECT_H -#define LINUX_NFSD_FAULT_INJECT_H - -#ifdef CONFIG_NFSD_FAULT_INJECTION -int nfsd_fault_inject_init(void); -void nfsd_fault_inject_cleanup(void); -void nfsd_forget_clients(u64); -void nfsd_forget_locks(u64); -void nfsd_forget_openowners(u64); -void nfsd_forget_delegations(u64); -void nfsd_recall_delegations(u64); -#else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline int nfsd_fault_inject_init(void) { return 0; } -static inline void nfsd_fault_inject_cleanup(void) {} -static inline void nfsd_forget_clients(u64 num) {} -static inline void nfsd_forget_locks(u64 num) {} -static inline void nfsd_forget_openowners(u64 num) {} -static inline void nfsd_forget_delegations(u64 num) {} -static inline void nfsd_recall_delegations(u64 num) {} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - -#endif /* LINUX_NFSD_FAULT_INJECT_H */ diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h index 2f3be132153..a3f34900091 100644 --- a/fs/nfsd/idmap.h +++ b/fs/nfsd/idmap.h @@ -42,21 +42,21 @@ #define IDMAP_NAMESZ 128 #ifdef CONFIG_NFSD_V4 -int nfsd_idmap_init(void); -void nfsd_idmap_shutdown(void); +int nfsd_idmap_init(struct net *); +void nfsd_idmap_shutdown(struct net *); #else -static inline int nfsd_idmap_init(void) +static inline int nfsd_idmap_init(struct net *net) { return 0; } -static inline void nfsd_idmap_shutdown(void) +static inline void nfsd_idmap_shutdown(struct net *net) { } #endif -__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); -__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); -int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); -int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); +__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); +__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t); #endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h new file mode 100644 index 00000000000..d32b3aa6600 --- /dev/null +++ b/fs/nfsd/netns.h @@ -0,0 +1,112 @@ +/* + * per net namespace data structures for nfsd + * + * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __NFSD_NETNS_H__ +#define __NFSD_NETNS_H__ + +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) + +#define SESSION_HASH_SIZE 512 + +struct cld_net; +struct nfsd4_client_tracking_ops; + +struct nfsd_net { + struct cld_net *cld_net; + + struct cache_detail *svc_expkey_cache; + struct cache_detail *svc_export_cache; + + struct cache_detail *idtoname_cache; + struct cache_detail *nametoid_cache; + + struct lock_manager nfsd4_manager; + bool grace_ended; + time_t boot_time; + + /* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_name_tree hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed + * setclientid info. + */ + struct list_head *reclaim_str_hashtbl; + int reclaim_str_hashtbl_size; + struct list_head *conf_id_hashtbl; + struct rb_root conf_name_tree; + struct list_head *unconf_id_hashtbl; + struct rb_root unconf_name_tree; + struct list_head *ownerstr_hashtbl; + struct list_head *lockowner_ino_hashtbl; + struct list_head *sessionid_hashtbl; + /* + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + * + * All of the above fields are protected by the client_mutex. + */ + struct list_head client_lru; + struct list_head close_lru; + struct list_head del_recall_lru; + + struct delayed_work laundromat_work; + + /* client_lock protects the client lru list and session hash table */ + spinlock_t client_lock; + + struct file *rec_file; + bool in_grace; + struct nfsd4_client_tracking_ops *client_tracking_ops; + + time_t nfsd4_lease; + time_t nfsd4_grace; + + bool nfsd_net_up; + bool lockd_up; + + /* + * Time of server startup + */ + struct timeval nfssvc_boot; + + struct svc_serv *nfsd_serv; +}; + +/* Simple check to find out if a given net was properly initialized */ +#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) + +extern int nfsd_net_id; +#endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 6aa5590c367..12b023a7ab7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -30,8 +30,9 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); @@ -41,26 +42,24 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; + nfserr = fh_getattr(fh, &resp->stat); + if (nfserr) + goto fail; + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -68,17 +67,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -99,28 +91,51 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + if (error) + goto out_drop_write; + + fh_drop_write(fh); + + nfserr = fh_getattr(fh, &resp->stat); + +out: /* argp->acl_{access,default} may have been allocated in nfssvc_decode_setaclargs. */ posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); + goto out; } /* @@ -129,10 +144,15 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) { + __be32 nfserr; dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); + return nfserr; } /* @@ -150,6 +170,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); return nfserr; } @@ -159,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p); p++; @@ -174,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p++); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -195,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -203,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->access = ntohl(*p++); @@ -218,8 +245,7 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, * There must be an encoding function for void results so svc_process * will work properly. */ -int -nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) +static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) { return xdr_ressize_check(rqstp, p); } @@ -244,7 +270,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, return 0; inode = dentry->d_inode; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->mask); if (!xdr_ressize_check(rqstp, p)) return 0; @@ -254,7 +280,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } @@ -275,7 +301,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, struct nfsd_attrstat *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -283,7 +309,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessres *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->access); return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index a596e9d987e..2a514e21dc7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -29,8 +29,9 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); @@ -38,26 +39,20 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -65,17 +60,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -96,21 +84,37 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd3_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; fh = fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); +out: /* argp->acl_{access,default} may have been allocated in nfs3svc_decode_setaclargs. */ posix_acl_release(argp->acl_access); @@ -124,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *args) { - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p); p++; @@ -139,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p++); if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -184,7 +190,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9095f3c21df..40128991313 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -43,7 +43,6 @@ static __be32 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int err; __be32 nfserr; dprintk("nfsd: GETATTR(3) %s\n", @@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, if (nfserr) RETURN_STATUS(nfserr); - err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, &resp->stat); - nfserr = nfserrno(err); + nfserr = fh_getattr(&resp->fh, &resp->stat); RETURN_STATUS(nfserr); } @@ -247,7 +244,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, /* Now create the file and set attributes */ nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, - argp->createmode, argp->verf, NULL, NULL); + argp->createmode, (u32 *)argp->verf, NULL, NULL); RETURN_STATUS(nfserr); } @@ -460,7 +457,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, __be32 nfserr; int count = 0; loff_t offset; - int i; + struct page **p; caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", @@ -484,8 +481,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - for (i=1; i<rqstp->rq_resused ; i++) { - page_addr = page_address(rqstp->rq_respages[i]); + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); if (((caddr_t)resp->buffer >= page_addr) && ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 08c6e36ab2e..e6c01e80325 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -7,8 +7,11 @@ */ #include <linux/namei.h> +#include <linux/sunrpc/svc_xprt.h> #include "xdr3.h" #include "auth.h" +#include "netns.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -103,12 +106,14 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_mode = ntohl(*p++); } if (*p++) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++)); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if (*p++) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++)); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if (*p++) { u64 newsize; @@ -163,10 +168,10 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { @@ -202,10 +207,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode) { - int err; + __be32 err; struct kstat stat; - err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); + err = fh_getattr(fhp, &stat); if (!err) { *p++ = xdr_one; /* attributes follow */ lease_get_mtime(dentry->d_inode, &stat.mtime); @@ -252,13 +257,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) */ void fill_post_wcc(struct svc_fh *fhp) { - int err; + __be32 err; if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); - err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, - &fhp->fh_post_attr); + err = fh_getattr(fhp, &fhp->fh_post_attr); fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) { fhp->fh_post_saved = 0; @@ -274,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp) int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -283,7 +288,8 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = decode_sattr3(p, &args->attrs); @@ -311,7 +317,8 @@ int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->access = ntohl(*p++); @@ -323,10 +330,11 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readargs *args) { unsigned int len; - int v,pn; + int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -338,8 +346,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, /* set up the kvec */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -355,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -461,8 +471,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, len = ntohl(*p++); if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) return 0; - args->tname = new = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->tname = new = page_address(*(rqstp->rq_next_page++)); args->tlen = len; /* first copy and check from the first page */ old = (char*)p; @@ -531,10 +540,10 @@ int nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -555,7 +564,8 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -565,8 +575,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -575,10 +584,11 @@ int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - int len, pn; + int len; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -590,9 +600,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->count = len; while (len > 0) { - pn = rqstp->rq_resused++; + struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) - args->buffer = page_address(rqstp->rq_respages[pn]); + args->buffer = page_address(p); len -= PAGE_SIZE; } @@ -603,7 +613,8 @@ int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); @@ -720,12 +731,14 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } @@ -803,13 +816,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, return p; } -static int +static __be32 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, const char *name, int namlen) { struct svc_export *exp; struct dentry *dparent, *dchild; - int rv = 0; + __be32 rv = nfserr_noent; dparent = cd->fh.fh_dentry; exp = cd->fh.fh_export; @@ -817,26 +830,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - if (dchild == dparent) { - /* filesystem root - cannot return filehandle for ".." */ - dput(dchild); - return -ENOENT; - } + /* filesystem root - cannot return filehandle for ".." */ + if (dchild == dparent) + goto out; } else dchild = dget(dparent); } else dchild = lookup_one_len(name, dparent, namlen); if (IS_ERR(dchild)) - return -ENOENT; - rv = -ENOENT; + return rv; if (d_mountpoint(dchild)) goto out; - rv = fh_compose(fhp, exp, dchild, &cd->fh); - if (rv) - goto out; if (!dchild->d_inode) goto out; - rv = 0; + rv = fh_compose(fhp, exp, dchild, &cd->fh); out: dput(dchild); return rv; @@ -844,21 +851,21 @@ out: static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { - struct svc_fh fh; - int err; + struct svc_fh *fh = &cd->scratch; + __be32 err; - fh_init(&fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, &fh, name, namlen); + fh_init(fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, fh, name, namlen); if (err) { *p++ = 0; *p++ = 0; goto out; } - p = encode_post_op_attr(cd->rqstp, p, &fh); + p = encode_post_op_attr(cd->rqstp, p, fh); *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, &fh); + p = encode_fh(p, fh); out: - fh_put(&fh); + fh_put(fh); return p; } @@ -882,7 +889,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, common); __be32 *p = cd->buffer; caddr_t curr_page_addr = NULL; - int pn; /* current page number */ + struct page ** page; int slen; /* string (name) length */ int elen; /* estimated entry length in words */ int num_entry_words = 0; /* actual number of words */ @@ -919,8 +926,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } /* determine which page in rq_respages[] we are currently filling */ - for (pn=1; pn < cd->rqstp->rq_resused; pn++) { - curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + for (page = cd->rqstp->rq_respages + 1; + page < cd->rqstp->rq_next_page; page++) { + curr_page_addr = page_address(*page); if (((caddr_t)cd->buffer >= curr_page_addr) && ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) @@ -935,14 +943,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, if (plus) p = encode_entryplus_baggage(cd, p, name, namlen); num_entry_words = p - cd->buffer; - } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + } else if (*(page+1) != NULL) { /* temporarily encode entry into next page, then move back to * current and next page in rq_respages[] */ __be32 *p1, *tmp; int len1, len2; /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + p1 = tmp = page_address(*(page+1)); p1 = encode_entry_baggage(cd, p1, name, namlen, ino); @@ -1088,11 +1096,13 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 9c51aff02ae..d714156a19f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,9 +36,14 @@ #include <linux/slab.h> #include <linux/nfs_fs.h> -#include <linux/export.h> +#include "nfsfh.h" +#include "nfsd.h" #include "acl.h" +#include "vfs.h" +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 /* mode bit translations: */ #define NFS4_READ_MODE (NFS4_ACE_READ_DATA) @@ -130,36 +135,47 @@ static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -struct nfs4_acl * -nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, - unsigned int flags) +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl) { - struct nfs4_acl *acl; + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; int size = 0; - if (pacl) { - if (posix_acl_valid(pacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*pacl->a_count; + pacl = get_acl(inode, ACL_TYPE_ACCESS); + if (!pacl) { + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) + return PTR_ERR(pacl); } - if (dpacl) { - if (posix_acl_valid(dpacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*dpacl->a_count; + /* allocate for worst case: one (deny, allow) pair each: */ + size += 2 * pacl->a_count; + + if (S_ISDIR(inode->i_mode)) { + flags = NFS4_ACL_DIR; + dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (dpacl) + size += 2 * dpacl->a_count; } - /* Allocate for worst case: one (deny, allow) pair each: */ - acl = nfs4_acl_new(size); - if (acl == NULL) - return ERR_PTR(-ENOMEM); + *acl = nfs4_acl_new(size); + if (*acl == NULL) { + error = -ENOMEM; + goto out; + } - if (pacl) - _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); if (dpacl) - _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - return acl; + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; } struct posix_acl_summary { @@ -264,7 +280,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->flag = eflag; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; } @@ -273,7 +289,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; pa++; @@ -300,7 +316,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; pa++; @@ -329,7 +345,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; } @@ -345,6 +361,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, acl->naces++; } +static bool +pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2) +{ + if (pace1->e_tag != pace2->e_tag) + return pace1->e_tag > pace2->e_tag; + if (pace1->e_tag == ACL_USER) + return uid_gt(pace1->e_uid, pace2->e_uid); + if (pace1->e_tag == ACL_GROUP) + return gid_gt(pace1->e_gid, pace2->e_gid); + return false; +} + static void sort_pacl_range(struct posix_acl *pacl, int start, int end) { int sorted = 0, i; @@ -355,8 +383,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) { while (!sorted) { sorted = 1; for (i = start; i < end; i++) { - if (pacl->a_entries[i].e_id - > pacl->a_entries[i+1].e_id) { + if (pace_gt(&pacl->a_entries[i], + &pacl->a_entries[i+1])) { sorted = 0; tmp = pacl->a_entries[i]; pacl->a_entries[i] = pacl->a_entries[i+1]; @@ -373,8 +401,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -398,7 +428,10 @@ struct posix_ace_state { }; struct posix_user_ace_state { - uid_t uid; + union { + kuid_t uid; + kgid_t gid; + }; struct posix_ace_state perms; }; @@ -498,19 +531,21 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all * permissions zero. */ - nace = 4 + state->users->n + state->groups->n; + if (!state->users->n && !state->groups->n) + nace = 3; + else /* Note we also include a MASK ACE in this case: */ + nace = 4 + state->users->n + state->groups->n; pacl = posix_acl_alloc(nace, GFP_KERNEL); if (!pacl) return ERR_PTR(-ENOMEM); @@ -521,7 +556,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; for (i=0; i < state->users->n; i++) { pace++; @@ -531,7 +565,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->users->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->users->aces[i].uid; + pace->e_uid = state->users->aces[i].uid; add_to_mask(state, &state->users->aces[i].perms); } @@ -541,7 +575,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; add_to_mask(state, &state->group); for (i=0; i < state->groups->n; i++) { @@ -552,14 +585,15 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->groups->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->groups->aces[i].uid; + pace->e_gid = state->groups->aces[i].gid; add_to_mask(state, &state->groups->aces[i].perms); } - pace++; - pace->e_tag = ACL_MASK; - low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; + if (state->users->n || state->groups->n) { + pace++; + pace->e_tag = ACL_MASK; + low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); + } pace++; pace->e_tag = ACL_OTHER; @@ -567,7 +601,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; return pacl; out_err: @@ -587,12 +620,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask) astate->deny |= mask & ~astate->allow; } -static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) +static int find_uid(struct posix_acl_state *state, kuid_t uid) { + struct posix_ace_state_array *a = state->users; int i; for (i = 0; i < a->n; i++) - if (a->aces[i].uid == uid) + if (uid_eq(a->aces[i].uid, uid)) return i; /* Not found: */ a->n++; @@ -603,6 +637,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array return i; } +static int find_gid(struct posix_acl_state *state, kgid_t gid) +{ + struct posix_ace_state_array *a = state->groups; + int i; + + for (i = 0; i < a->n; i++) + if (gid_eq(a->aces[i].gid, gid)) + return i; + /* Not found: */ + a->n++; + a->aces[i].gid = gid; + a->aces[i].perms.allow = state->everyone.allow; + a->aces[i].perms.deny = state->everyone.deny; + + return i; +} + static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) { int i; @@ -636,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_USER: - i = find_uid(state, state->users, ace->who); + i = find_uid(state, ace->who_uid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->users->aces[i].perms, mask); } else { @@ -658,7 +709,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_GROUP: - i = find_uid(state, state->groups, ace->who); + i = find_gid(state, ace->who_gid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->groups->aces[i].perms, mask); } else { @@ -690,8 +741,9 @@ static void process_one_v4_ace(struct posix_acl_state *state, } } -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, - struct posix_acl **dpacl, unsigned int flags) +static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, + struct posix_acl **pacl, struct posix_acl **dpacl, + unsigned int flags) { struct posix_acl_state effective_acl_state, default_acl_state; struct nfs4_ace *ace; @@ -751,6 +803,57 @@ out_estate: return ret; } +__be32 +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + __be32 error; + int host_error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) + return nfserr_attrnotsupp; + + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; + + host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + if (host_error == -EINVAL) + return nfserr_attrnotsupp; + if (host_error < 0) + goto out_nfserr; + + host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + if (host_error < 0) + goto out_release; + + if (S_ISDIR(inode->i_mode)) { + host_error = inode->i_op->set_acl(inode, dpacl, + ACL_TYPE_DEFAULT); + } + +out_release: + posix_acl_release(pacl); + posix_acl_release(dpacl); +out_nfserr: + if (host_error == -EOPNOTSUPP) + return nfserr_attrnotsupp; + else + return nfserrno(host_error); +} + + static short ace2type(struct nfs4_ace *ace) { @@ -769,9 +872,6 @@ ace2type(struct nfs4_ace *ace) return -1; } -EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); -EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); - struct nfs4_acl * nfs4_acl_new(int n) { @@ -819,21 +919,21 @@ nfs4_acl_get_whotype(char *p, u32 len) return NFS4_ACL_WHO_NAMED; } -int -nfs4_acl_write_who(int who, char *p) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) { + __be32 *p; int i; for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { - if (s2t_map[i].type == who) { - memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); - return s2t_map[i].stringlen; - } + if (s2t_map[i].type != who) + continue; + p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, s2t_map[i].string, + s2t_map[i].stringlen); + return 0; } - BUG(); + WARN_ON_ONCE(1); return -1; } - -EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_get_whotype); -EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6f3ebb48b12..2c73cae9899 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,10 +32,13 @@ */ #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/xprt.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/slab.h> #include "nfsd.h" #include "state.h" +#include "netns.h" +#include "xdr4cb.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -52,30 +55,6 @@ enum { NFSPROC4_CLNT_CB_SEQUENCE, }; -#define NFS4_MAXTAGLEN 20 - -#define NFS4_enc_cb_null_sz 0 -#define NFS4_dec_cb_null_sz 0 -#define cb_compound_enc_hdr_sz 4 -#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) -#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) -#define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) -#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) - -#define op_enc_sz 1 -#define op_dec_sz 2 -#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) -#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) -#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + enc_stateid_sz + \ - enc_nfs4_fh_sz) - -#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + \ - op_dec_sz) - struct nfs4_cb_compound_hdr { /* args */ u32 ident; /* minorversion 0 only */ @@ -605,57 +584,103 @@ static struct rpc_version nfs_cb_version4 = { .procs = nfs4_cb_procedures }; -static struct rpc_version *nfs_cb_version[] = { +static const struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -static struct rpc_program cb_program; +static const struct rpc_program cb_program; static struct rpc_stat cb_stats = { .program = &cb_program }; #define NFS4_CALLBACK 0x40000000 -static struct rpc_program cb_program = { +static const struct rpc_program cb_program = { .name = "nfs4_cb", .number = NFS4_CALLBACK, .nrvers = ARRAY_SIZE(nfs_cb_version), .version = nfs_cb_version, .stats = &cb_stats, - .pipe_dir_name = "/nfsd4_cb", + .pipe_dir_name = "nfsd4_cb", }; -static int max_cb_time(void) +static int max_cb_time(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return max(nn->nfsd4_lease/10, (time_t)1) * HZ; +} + +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) +{ + if (callback_cred) + return 0; + callback_cred = rpc_lookup_machine_cred("nfs"); + if (!callback_cred) + return -ENOMEM; + return 0; +} + +static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) { - return max(nfsd4_lease/10, (time_t)1) * HZ; + if (clp->cl_minorversion == 0) { + return get_rpccred(callback_cred); + } else { + struct rpc_auth *auth = client->cl_auth; + struct auth_cred acred = {}; + + acred.uid = ses->se_cb_sec.uid; + acred.gid = ses->se_cb_sec.gid; + return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + } } +static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) +{ + struct rpc_xprt *xprt; + + if (args->protocol != XPRT_TRANSPORT_BC_TCP) + return rpc_create(args); + + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + + return rpc_create(args); +} static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .address = (struct sockaddr *) &conn->cb_addr, .addrsize = conn->cb_addrlen, .saddress = (struct sockaddr *) &conn->cb_saddr, .timeout = &timeparms, .program = &cb_program, .version = 0, - .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; + struct rpc_cred *cred; if (clp->cl_minorversion == 0) { - if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + if (!clp->cl_cred.cr_principal && + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - args.client_name = clp->cl_principal; + args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; + args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { if (!conn->cb_xprt) @@ -665,17 +690,23 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; + args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = rpc_create(&args); + client = create_backchannel_client(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); return PTR_ERR(client); } + cred = get_backchannel_cred(clp, client, ses); + if (IS_ERR(cred)) { + rpc_shutdown_client(client); + return PTR_ERR(cred); + } clp->cl_cb_client = client; + clp->cl_cb_cred = cred; return 0; - } static void warn_no_callback_path(struct nfs4_client *clp, int reason) @@ -712,18 +743,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { .rpc_call_done = nfsd4_cb_probe_done, }; -static struct rpc_cred *callback_cred; - -int set_callback_cred(void) -{ - if (callback_cred) - return 0; - callback_cred = rpc_lookup_machine_cred("nfs"); - if (!callback_cred) - return -ENOMEM; - return 0; -} - static struct workqueue_struct *callback_wq; static void run_nfsd4_cb(struct nfsd4_callback *cb) @@ -741,7 +760,6 @@ static void do_probe_callback(struct nfs4_client *clp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; cb->cb_msg.rpc_argp = NULL; cb->cb_msg.rpc_resp = NULL; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_probe_ops; @@ -754,9 +772,8 @@ static void do_probe_callback(struct nfs4_client *clp) */ void nfsd4_probe_callback(struct nfs4_client *clp) { - /* XXX: atomicity? Also, should we be using cl_cb_flags? */ clp->cl_cb_state = NFSD4_CB_UNKNOWN; - set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); do_probe_callback(clp); } @@ -796,8 +813,7 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; @@ -818,8 +834,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); @@ -842,7 +857,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); @@ -915,7 +930,7 @@ void nfsd4_destroy_callback_queue(void) /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { - set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); + set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; * instead, nfsd4_do_callback_rpc() will detect the killed @@ -961,20 +976,22 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) if (clp->cl_cb_client) { rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; + put_rpccred(clp->cl_cb_cred); + clp->cl_cb_cred = NULL; } if (clp->cl_cb_conn.cb_xprt) { svc_xprt_put(clp->cl_cb_conn.cb_xprt); clp->cl_cb_conn.cb_xprt = NULL; } - if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) + if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) return; spin_lock(&clp->cl_lock); /* * Only serialized callback code is allowed to clear these * flags; main nfsd code can only set them: */ - BUG_ON(!clp->cl_cb_flags); - clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); c = __nfsd4_find_backchannel(clp); if (c) { @@ -986,7 +1003,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { - warn_no_callback_path(clp, err); + nfsd4_mark_cb_down(clp, err); return; } /* Yay, the callback channel's back! Restart any callbacks: */ @@ -994,13 +1011,13 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -void nfsd4_do_callback_rpc(struct work_struct *w) +static void nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - if (clp->cl_cb_flags) + if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; @@ -1009,10 +1026,16 @@ void nfsd4_do_callback_rpc(struct work_struct *w) nfsd4_release_cb(cb); return; } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops, cb); } +void nfsd4_init_callback(struct nfsd4_callback *cb) +{ + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); +} + void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; @@ -1024,10 +1047,8 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; - cb->cb_msg.rpc_cred = callback_cred; cb->cb_ops = &nfsd4_cb_recall_ops; - dp->dl_retries = 1; INIT_LIST_HEAD(&cb->cb_per_client); cb->cb_done = true; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 94096273cd6..a0ab0a847d6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -36,9 +36,19 @@ #include <linux/seq_file.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/sunrpc/svc_xprt.h> #include <net/net_namespace.h> #include "idmap.h" #include "nfsd.h" +#include "netns.h" + +/* + * Turn off idmapping when using AUTH_SYS. + */ +static bool nfs4_disable_idmapping = true; +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off server's NFSv4 idmapping when using 'sec=sys'"); /* * Cache entry @@ -55,7 +65,7 @@ struct ent { struct cache_head h; int type; /* User / Group */ - uid_t id; + u32 id; char name[IDMAP_NAMESZ]; char authname[IDMAP_NAMESZ]; }; @@ -99,8 +109,6 @@ ent_alloc(void) * ID -> Name cache */ -static struct cache_head *idtoname_table[ENT_HASHMAX]; - static uint32_t idtoname_hash(struct ent *ent) { @@ -132,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -175,16 +177,16 @@ warn_no_idmapd(struct cache_detail *detail, int has_died) static int idtoname_parse(struct cache_detail *, char *, int); -static struct ent *idtoname_lookup(struct ent *); -static struct ent *idtoname_update(struct ent *, struct ent *); +static struct ent *idtoname_lookup(struct cache_detail *, struct ent *); +static struct ent *idtoname_update(struct cache_detail *, struct ent *, + struct ent *); -static struct cache_detail idtoname_cache = { +static struct cache_detail idtoname_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, - .hash_table = idtoname_table, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -236,7 +238,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) goto out; error = -ENOMEM; - res = idtoname_lookup(&ent); + res = idtoname_lookup(cd, &ent); if (!res) goto out; @@ -252,11 +254,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) else memcpy(ent.name, buf1, sizeof(ent.name)); error = -ENOMEM; - res = idtoname_update(&ent, res); + res = idtoname_update(cd, &ent, res); if (res == NULL) goto out; - cache_put(&res->h, &idtoname_cache); + cache_put(&res->h, cd); error = 0; out: @@ -267,10 +269,9 @@ out: static struct ent * -idtoname_lookup(struct ent *item) +idtoname_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache, - &item->h, + struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, idtoname_hash(item)); if (ch) return container_of(ch, struct ent, h); @@ -279,10 +280,9 @@ idtoname_lookup(struct ent *item) } static struct ent * -idtoname_update(struct ent *new, struct ent *old) +idtoname_update(struct cache_detail *cd, struct ent *new, struct ent *old) { - struct cache_head *ch = sunrpc_cache_update(&idtoname_cache, - &new->h, &old->h, + struct cache_head *ch = sunrpc_cache_update(cd, &new->h, &old->h, idtoname_hash(new)); if (ch) return container_of(ch, struct ent, h); @@ -295,8 +295,6 @@ idtoname_update(struct ent *new, struct ent *old) * Name -> ID cache */ -static struct cache_head *nametoid_table[ENT_HASHMAX]; - static inline int nametoid_hash(struct ent *ent) { @@ -317,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -351,17 +343,17 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) return 0; } -static struct ent *nametoid_lookup(struct ent *); -static struct ent *nametoid_update(struct ent *, struct ent *); +static struct ent *nametoid_lookup(struct cache_detail *, struct ent *); +static struct ent *nametoid_update(struct cache_detail *, struct ent *, + struct ent *); static int nametoid_parse(struct cache_detail *, char *, int); -static struct cache_detail nametoid_cache = { +static struct cache_detail nametoid_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, - .hash_table = nametoid_table, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, @@ -418,14 +410,14 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) set_bit(CACHE_NEGATIVE, &ent.h.flags); error = -ENOMEM; - res = nametoid_lookup(&ent); + res = nametoid_lookup(cd, &ent); if (res == NULL) goto out; - res = nametoid_update(&ent, res); + res = nametoid_update(cd, &ent, res); if (res == NULL) goto out; - cache_put(&res->h, &nametoid_cache); + cache_put(&res->h, cd); error = 0; out: kfree(buf1); @@ -435,10 +427,9 @@ out: static struct ent * -nametoid_lookup(struct ent *item) +nametoid_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache, - &item->h, + struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, nametoid_hash(item)); if (ch) return container_of(ch, struct ent, h); @@ -447,10 +438,9 @@ nametoid_lookup(struct ent *item) } static struct ent * -nametoid_update(struct ent *new, struct ent *old) +nametoid_update(struct cache_detail *cd, struct ent *new, struct ent *old) { - struct cache_head *ch = sunrpc_cache_update(&nametoid_cache, - &new->h, &old->h, + struct cache_head *ch = sunrpc_cache_update(cd, &new->h, &old->h, nametoid_hash(new)); if (ch) return container_of(ch, struct ent, h); @@ -463,34 +453,55 @@ nametoid_update(struct ent *new, struct ent *old) */ int -nfsd_idmap_init(void) +nfsd_idmap_init(struct net *net) { int rv; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - rv = cache_register_net(&idtoname_cache, &init_net); + nn->idtoname_cache = cache_create_net(&idtoname_cache_template, net); + if (IS_ERR(nn->idtoname_cache)) + return PTR_ERR(nn->idtoname_cache); + rv = cache_register_net(nn->idtoname_cache, net); if (rv) - return rv; - rv = cache_register_net(&nametoid_cache, &init_net); + goto destroy_idtoname_cache; + nn->nametoid_cache = cache_create_net(&nametoid_cache_template, net); + if (IS_ERR(nn->nametoid_cache)) { + rv = PTR_ERR(nn->nametoid_cache); + goto unregister_idtoname_cache; + } + rv = cache_register_net(nn->nametoid_cache, net); if (rv) - cache_unregister_net(&idtoname_cache, &init_net); + goto destroy_nametoid_cache; + return 0; + +destroy_nametoid_cache: + cache_destroy_net(nn->nametoid_cache, net); +unregister_idtoname_cache: + cache_unregister_net(nn->idtoname_cache, net); +destroy_idtoname_cache: + cache_destroy_net(nn->idtoname_cache, net); return rv; } void -nfsd_idmap_shutdown(void) +nfsd_idmap_shutdown(struct net *net) { - cache_unregister_net(&idtoname_cache, &init_net); - cache_unregister_net(&nametoid_cache, &init_net); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + cache_unregister_net(nn->idtoname_cache, net); + cache_unregister_net(nn->nametoid_cache, net); + cache_destroy_net(nn->idtoname_cache, net); + cache_destroy_net(nn->nametoid_cache, net); } static int idmap_lookup(struct svc_rqst *rqstp, - struct ent *(*lookup_fn)(struct ent *), struct ent *key, - struct cache_detail *detail, struct ent **item) + struct ent *(*lookup_fn)(struct cache_detail *, struct ent *), + struct ent *key, struct cache_detail *detail, struct ent **item) { int ret; - *item = lookup_fn(key); + *item = lookup_fn(detail, key); if (!*item) return -ENOMEM; retry: @@ -498,7 +509,7 @@ idmap_lookup(struct svc_rqst *rqstp, if (ret == -ETIMEDOUT) { struct ent *prev_item = *item; - *item = lookup_fn(key); + *item = lookup_fn(detail, key); if (*item != prev_item) goto retry; cache_put(&(*item)->h, detail); @@ -517,72 +528,143 @@ rqst_authname(struct svc_rqst *rqstp) static __be32 idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, - uid_t *id) + u32 *id) { struct ent *item, key = { .type = type, }; int ret; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (namelen + 1 > sizeof(key.name)) return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); - ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); + ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) return nfserr_badowner; if (ret) return nfserrno(ret); *id = item->id; - cache_put(&item->h, &nametoid_cache); + cache_put(&item->h, nn->nametoid_cache); return 0; } -static int -idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) +{ + char buf[11]; + int len; + __be32 *p; + + len = sprintf(buf, "%u", id); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, buf, len); + return 0; +} + +static __be32 idmap_id_to_name(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { struct ent *item, key = { .id = id, .type = type, }; + __be32 *p; int ret; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); - ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); + ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) - return sprintf(name, "%u", id); + return encode_ascii_id(xdr, id); if (ret) - return ret; + return nfserrno(ret); ret = strlen(item->name); - BUG_ON(ret > IDMAP_NAMESZ); - memcpy(name, item->name, ret); - cache_put(&item->h, &idtoname_cache); - return ret; + WARN_ON_ONCE(ret > IDMAP_NAMESZ); + p = xdr_reserve_space(xdr, ret + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, item->name, ret); + cache_put(&item->h, nn->idtoname_cache); + return 0; +} + +static bool +numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) +{ + int ret; + char buf[11]; + + if (namelen + 1 > sizeof(buf)) + /* too long to represent a 32-bit id: */ + return false; + /* Just to make sure it's null-terminated: */ + memcpy(buf, name, namelen); + buf[namelen] = '\0'; + ret = kstrtouint(buf, 10, id); + return ret == 0; +} + +static __be32 +do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) +{ + if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + if (numeric_name_to_id(rqstp, type, name, namelen, id)) + return 0; + /* + * otherwise, fall through and try idmapping, for + * backwards compatibility with clients sending names: + */ + return idmap_name_to_id(rqstp, type, name, namelen, id); +} + +static __be32 encode_name_from_id(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) +{ + if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + return encode_ascii_id(xdr, id); + return idmap_id_to_name(xdr, rqstp, type, id); } __be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kuid_t *uid) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); + *uid = make_kuid(&init_user_ns, id); + if (!uid_valid(*uid)) + status = nfserr_badowner; + return status; } __be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kgid_t *gid) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); + *gid = make_kgid(&init_user_ns, id); + if (!gid_valid(*gid)) + status = nfserr_badowner; + return status; } -int -nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kuid_t uid) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); + u32 id = from_kuid(&init_user_ns, uid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } -int -nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kgid_t gid) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); + u32 id = from_kgid(&init_user_ns, gid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 896da74ec56..8f029db5d27 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -39,6 +39,39 @@ #include "cache.h" #include "xdr4.h" #include "vfs.h" +#include "current_stateid.h" +#include "netns.h" +#include "acl.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -189,13 +222,25 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfserr_symlink; } +static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) +{ + if (nfsd4_has_session(cstate)) + return; + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh->fh_handle); +} + static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { - struct svc_fh resfh; + struct svc_fh *current_fh = &cstate->current_fh; + int accmode; __be32 status; - fh_init(&resfh, NFS4_FHSIZE); + *resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + if (!*resfh) + return nfserr_jukebox; + fh_init(*resfh, NFS4_FHSIZE); open->op_truncate = 0; if (open->op_create) { @@ -220,10 +265,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, - &resfh, open->op_createmode, + *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, use the returned bitmask * to indicate which attributes we used to store the @@ -231,40 +279,42 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); - } else { + FATTR4_WORD1_TIME_MODIFY); + } else + /* + * Note this may exit with the parent still locked. + * We will hold the lock until nfsd4_open's final + * lookup, to prevent renames or unlinks until we've had + * a chance to an acquire a delegation if appropriate. + */ status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, &resfh); - fh_unlock(current_fh); - if (status) - goto out; - status = nfsd_check_obj_isreg(&resfh); - } + open->op_fname.data, open->op_fname.len, *resfh); + if (status) + goto out; + status = nfsd_check_obj_isreg(*resfh); if (status) goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); - + do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); + + nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); + accmode = NFSD_MAY_NOP; + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, *resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); - fh_dup2(current_fh, &resfh); - - /* set reply cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - &resfh.fh_handle); - if (!open->op_created) - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_NOP); - out: - fh_put(&resfh); return status; } static __be32 -do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; + int accmode = 0; /* We don't know the target directory, and therefore can not * set the change info @@ -272,15 +322,23 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); - /* set replay cache */ - fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - ¤t_fh->fh_handle); + nfsd4_set_open_owner_reply_cache(cstate, open, current_fh); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); + /* + * In the delegation case, the client is telling us about an + * open that it *already* performed locally, some time ago. We + * should let it succeed now if possible. + * + * In the case of a CLAIM_FH open, on the other hand, the client + * may be counting on us to enforce permissions (the Linux 4.1 + * client uses this for normal opens, for example). + */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) + accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, accmode); return status; } @@ -300,7 +358,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { __be32 status; + struct svc_fh *resfh = NULL; struct nfsd4_compoundres *resp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, @@ -310,16 +371,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; - /* We don't yet support WANT bits: */ - open->op_share_access &= NFS4_SHARE_ACCESS_MASK; - open->op_created = 0; /* * RFC5661 18.51.3 * Before RECLAIM_COMPLETE done, server should deny new lock */ if (nfsd4_has_session(cstate) && - !cstate->session->se_client->cl_firststate && + !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_grace; @@ -330,7 +389,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; - status = nfsd4_process_open1(&resp->cstate, open); + status = nfsd4_process_open1(&resp->cstate, open, nn); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); @@ -345,6 +404,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (status) goto out; + if (open->op_xdr_error) { + status = open->op_xdr_error; + goto out; + } status = nfsd4_check_open_attributes(rqstp, cstate, open); if (status) @@ -353,35 +416,35 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, &cstate->current_fh, - open); + status = do_open_lookup(rqstp, cstate, open, &resfh); if (status) goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - status = nfs4_check_open_reclaim(&open->op_clientid); + status = nfs4_check_open_reclaim(&open->op_clientid, + cstate->minorversion, + nn); if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, &cstate->current_fh, - open); + status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; + resfh = &cstate->current_fh; break; case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; @@ -397,18 +460,42 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * successful, it (1) truncates the file if open->op_truncate was * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ - status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); + status = nfsd4_process_open2(rqstp, resfh, open); WARN_ON(status && open->op_created); out: + if (resfh && resfh != &cstate->current_fh) { + fh_dup2(&cstate->current_fh, resfh); + fh_put(resfh); + kfree(resfh); + } nfsd4_cleanup_open_state(open, status); - if (open->op_openowner) + if (open->op_openowner && !nfsd4_has_session(cstate)) cstate->replay_owner = &open->op_openowner->oo_owner; - else + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) nfs4_unlock_state(); return status; } /* + * OPEN is the only seqid-mutating operation whose decoding can fail + * with a seqid-mutating error (specifically, decoding of user names in + * the attributes). Therefore we have to do some processing to look up + * the stateowner so that we can bump the seqid. + */ +static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op) +{ + struct nfsd4_open *open = (struct nfsd4_open *)&op->u; + + if (!seqid_mutating_err(ntohl(op->status))) + return op->status; + if (nfsd4_has_session(cstate)) + return op->status; + open->op_xdr_error = op->status; + return nfsd4_open(rqstp, cstate, open); +} + +/* * filehandle-manipulating ops. */ static __be32 @@ -452,6 +539,10 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); + if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } return nfs_ok; } @@ -463,6 +554,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; fh_dup2(&cstate->save_fh, &cstate->current_fh); + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + } return nfs_ok; } @@ -481,14 +576,21 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } +static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) +{ + __be32 verf[2]; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + verf[0] = (__be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + memcpy(verifier->data, verf, sizeof(verifier->data)); +} + static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - u32 *p = (u32 *)commit->co_verf.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; - + gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); } @@ -515,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, @@ -576,6 +669,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -669,18 +765,26 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the + * following compound. + * + * To ensure proper ordering, we therefore turn off zero copy if + * the client wants us to do more in this compound: + */ + if (!nfsd4_last_compound_op(rqstp)) + rqstp->rq_splice_ok = false; + /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, + if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, &read->rd_stateid, RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -726,7 +830,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (locks_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -745,27 +849,17 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags - & NFSEXP_NOSUBTREECHECK)) + if (locks_in_grace(SVC_NET(rqstp)) && + !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, rename->rn_tname, rename->rn_tnamelen); - - /* the underlying filesystem returns different error's than required - * by NFSv4. both save_fh and current_fh have been verified.. */ - if (status == nfserr_isdir) - status = nfserr_exist; - else if ((status == nfserr_notdir) && - (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && - S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) - status = nfserr_exist; - - if (!status) { - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); - } - return status; + if (status) + return status; + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + return nfs_ok; } static __be32 @@ -826,20 +920,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) { __be32 status = nfs_ok; + int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(cstate, + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, &setattr->sa_stateid, WR_STATE, NULL); - nfs4_unlock_state(); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; } } - status = fh_want_write(&cstate->current_fh); - if (status) - return status; + err = fh_want_write(&cstate->current_fh); + if (err) + return nfserrno(err); status = nfs_ok; status = check_attr_support(rqstp, cstate, setattr->sa_bmval, @@ -852,6 +945,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: @@ -859,27 +957,41 @@ out: return status; } +static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) +{ + int i = 1; + int buflen = write->wr_buflen; + + vec[0].iov_base = write->wr_head.iov_base; + vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); + buflen -= vec[0].iov_len; + + while (buflen) { + vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); + vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); + buflen -= vec[i].iov_len; + i++; + } + return i; +} + static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_write *write) { stateid_t *stateid = &write->wr_stateid; struct file *filp = NULL; - u32 *p; __be32 status = nfs_ok; unsigned long cnt; + int nvecs; /* no need to check permission - this will be done in nfsd_write() */ if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, stateid, WR_STATE, &filp); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; @@ -887,12 +999,13 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; - p = (u32 *)write->wr_verifier.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; + gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); + + nvecs = fill_in_write_vector(rqstp->rq_vec, write); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, write->wr_vlen, + write->wr_offset, rqstp->rq_vec, nvecs, &cnt, &write->wr_how_written); if (filp) fput(filp); @@ -937,14 +1050,17 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; - status = nfsd4_encode_fattr(&cstate->current_fh, + p = buf; + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); - - /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + /* + * If nfsd4_encode_fattr() ran out of space, assume that's because + * the attributes are longer (hence different) than those given: + */ + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; @@ -1000,6 +1116,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum) typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); +typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *); +typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *); enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ @@ -1025,6 +1143,10 @@ enum nfsd4_op_flags { * the v4.0 case). */ OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, }; struct nfsd4_operation { @@ -1033,6 +1155,8 @@ struct nfsd4_operation { char *op_name; /* Try to get response size before operation */ nfsd4op_rsize op_rsize_bop; + stateid_getter op_get_currentstateid; + stateid_setter op_set_currentstateid; }; static struct nfsd4_operation nfsd4_ops[]; @@ -1079,6 +1203,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) bool nfsd4_cache_this_op(struct nfsd4_op *op) { + if (op->opnum == OP_ILLEGAL) + return false; return OPDESC(op)->op_flags & OP_CACHEME; } @@ -1115,6 +1241,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->iov = head; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +} + /* * COMPOUND call. */ @@ -1126,26 +1271,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_op *op; struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; - int slack_bytes; - u32 plen = 0; + struct svc_fh *current_fh = &cstate->current_fh; + struct svc_fh *save_fh = &cstate->save_fh; __be32 status; - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + - rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + svcxdr_init_encode(rqstp, resp); + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; - resp->cstate.minorversion = args->minorversion; - resp->cstate.replay_owner = NULL; - resp->cstate.session = NULL; - fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); - fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + cstate->minorversion = args->minorversion; + fh_init(current_fh, NFS4_FHSIZE); + fh_init(save_fh, NFS4_FHSIZE); /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. @@ -1156,7 +1295,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > nfsd_supported_minorversion) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; status = nfs41_check_op_ordering(args); @@ -1177,62 +1316,71 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * for example, if there is a miscellaneous XDR error * it will be set to nfserr_bad_xdr. */ - if (op->status) - goto encode_op; - - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->end - (char *)resp->p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; + if (op->status) { + if (op->opnum == OP_OPEN) + op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } opdesc = OPDESC(op); - if (!cstate->current_fh.fh_dentry) { + if (!current_fh->fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export->ex_fslocs.migrated && !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } + fh_clear_wcc(current_fh); + /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { - plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, plen); } if (op->status) goto encode_op; - if (opdesc->op_func) - op->status = opdesc->op_func(rqstp, cstate, &op->u); - else - BUG_ON(op->status == nfs_ok); + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); + op->status = opdesc->op_func(rqstp, cstate, &op->u); + + if (!op->status) { + if (opdesc->op_set_currentstateid) + opdesc->op_set_currentstateid(cstate, &op->u); - if (!op->status && need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + if (opdesc->op_flags & OP_CLEAR_STATEID) + clear_current_stateid(cstate); + + if (need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(current_fh->fh_export, rqstp); + } encode_op: /* Only from SEQUENCE */ - if (resp->cstate.status == nfserr_replay_cache) { + if (cstate->status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); status = op->status; goto out; } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); @@ -1254,10 +1402,10 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } - resp->cstate.status = status; - fh_put(&resp->cstate.current_fh); - fh_put(&resp->cstate.save_fh); - BUG_ON(resp->cstate.replay_owner); + cstate->status = status; + fh_put(current_fh); + fh_put(save_fh); + BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ rqstp->rq_usedeferral = 1; @@ -1271,7 +1419,8 @@ out: #define op_encode_change_info_maxsz (5) #define nfs4_fattr_bitmap_maxsz (4) -#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -1303,6 +1452,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFSD4_MAX_SEC_LABEL_LEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1333,18 +1525,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) if (rlen > maxcount) rlen = maxcount; - return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { + u32 maxcount = svc_max_payload(rqstp); u32 rlen = op->u.readdir.rd_maxcount; - if (rlen > PAGE_SIZE) - rlen = PAGE_SIZE; + if (rlen > maxcount) + rlen = maxcount; - return (op_encode_hdr_size + op_encode_verifier_maxsz) - * sizeof(__be32) + rlen; + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) @@ -1359,6 +1552,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op + op_encode_change_info_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return NFS4_MAX_SESSIONID_LEN + 20; +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -1366,18 +1565,20 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); } static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ - 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 1 + 1 + /* eir_flags, spr_how */\ + 4 + /* spo_must_enforce & _allow with bitmap */\ 2 + /*eir_server_owner.so_minor_id */\ /* eir_server_owner.so_major_id<> */\ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ @@ -1413,6 +1614,8 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_CLOSE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid, }, [OP_COMMIT] = { .op_func = (nfsd4op_func)nfsd4_commit, @@ -1422,7 +1625,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID, .op_name = "OP_CREATE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, }, @@ -1431,10 +1634,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_DELEGRETURN", .op_rsize_bop = nfsd4_only_status_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid, }, [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, .op_name = "OP_GETATTR", }, [OP_GETFH] = { @@ -1453,6 +1658,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCK", .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, @@ -1463,15 +1669,16 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCKU", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid, }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { @@ -1483,6 +1690,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN", .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid, }, [OP_OPEN_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_open_confirm, @@ -1495,37 +1703,38 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN_DOWNGRADE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid, }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, @@ -1576,6 +1785,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_SETATTR", .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, @@ -1600,6 +1810,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_WRITE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, @@ -1617,6 +1828,12 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_EXCHANGE_ID", .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, }, + [OP_BACKCHANNEL_CTL] = { + .op_func = (nfsd4op_func)nfsd4_backchannel_ctl, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_BACKCHANNEL_CTL", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_BIND_CONN_TO_SESSION] = { .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP @@ -1670,10 +1887,32 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_free_stateid, .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, .op_name = "OP_FREE_STATEID", + .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, }; +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) @@ -1711,6 +1950,7 @@ struct svc_version nfsd_version4 = { .vs_proc = nfsd_procedures4, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS4_SVC_XDRSIZE, + .vs_rpcb_optnl = 1, }; /* diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 0b3e875d1ab..9c271f42604 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 The Regents of the University of Michigan. +* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com> * All rights reserved. * * Andy Adamson <andros@citi.umich.edu> @@ -36,15 +37,31 @@ #include <linux/namei.h> #include <linux/crypto.h> #include <linux/sched.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <net/net_namespace.h> +#include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/cld.h> #include "nfsd.h" #include "state.h" #include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC +/* Declarations */ +struct nfsd4_client_tracking_ops { + int (*init)(struct net *); + void (*exit)(struct net *); + void (*create)(struct nfs4_client *); + void (*remove)(struct nfs4_client *); + int (*check)(struct nfs4_client *); + void (*grace_done)(struct nfsd_net *, time_t); +}; + /* Globals */ -static struct file *rec_file; static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static int @@ -56,8 +73,8 @@ nfs4_save_creds(const struct cred **original_creds) if (!new) return -ENOMEM; - new->fsuid = 0; - new->fsgid = 0; + new->fsuid = GLOBAL_ROOT_UID; + new->fsgid = GLOBAL_ROOT_GID; *original_creds = override_creds(new); put_cred(new); return 0; @@ -83,33 +100,39 @@ md5_to_hex(char *out, char *md5) *out = '\0'; } -__be32 -nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +static int +nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; struct hash_desc desc; struct scatterlist sg; - __be32 status = nfserr_jukebox; + int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) + if (IS_ERR(desc.tfm)) { + status = PTR_ERR(desc.tfm); goto out_no_tfm; + } + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) + if (cksum.data == NULL) { + status = -ENOMEM; goto out; + } sg_init_one(&sg, clname->data, clname->len); - if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) + status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); + if (status) goto out; md5_to_hex(dname, cksum.data); - status = nfs_ok; + status = 0; out: kfree(cksum.data); crypto_free_hash(desc.tfm); @@ -117,25 +140,57 @@ out_no_tfm: return status; } -void nfsd4_create_clid_dir(struct nfs4_client *clp) +/* + * If we had an error generating the recdir name for the legacy tracker + * then warn the admin. If the error doesn't appear to be transient, + * then disable recovery tracking. + */ +static void +legacy_recdir_name_error(struct nfs4_client *clp, int error) +{ + printk(KERN_ERR "NFSD: unable to generate recoverydir " + "name (%d).\n", error); + + /* + * if the algorithm just doesn't exist, then disable the recovery + * tracker altogether. The crypto libs will generally return this if + * FIPS is enabled as well. + */ + if (error == -ENOENT) { + printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " + "Reboot recovery will not function correctly!\n"); + nfsd4_client_tracking_exit(clp->net); + } +} + +static void +nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; - char *dname = clp->cl_recdir; + char dname[HEXDIR_LEN]; struct dentry *dir, *dentry; + struct nfs4_client_reclaim *crp; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - - if (clp->cl_firststate) + if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - clp->cl_firststate = 1; - if (!rec_file) + if (!nn->rec_file) return; + + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(clp, status); + status = nfs4_save_creds(&original_cred); if (status < 0) return; - dir = rec_file->f_path.dentry; + status = mnt_want_write_file(nn->rec_file); + if (status) + return; + + dir = nn->rec_file->f_path.dentry; /* lock the parent */ mutex_lock(&dir->d_inode->i_mutex); @@ -154,37 +209,45 @@ void nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = mnt_want_write_file(rec_file); - if (status) - goto out_put; status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); - mnt_drop_write_file(rec_file); out_put: dput(dentry); out_unlock: mutex_unlock(&dir->d_inode->i_mutex); - if (status == 0) - vfs_fsync(rec_file, 0); - else + if (status == 0) { + if (nn->in_grace) { + crp = nfs4_client_to_reclaim(dname, nn); + if (crp) + crp->cr_clp = clp; + } + vfs_fsync(nn->rec_file, 0); + } else { printk(KERN_ERR "NFSD: failed to write recovery record" " (err %d); please check that %s exists" " and is writeable", status, user_recovery_dirname); + } + mnt_drop_write_file(nn->rec_file); nfs4_reset_creds(original_cred); } -typedef int (recdir_func)(struct dentry *, struct dentry *); +typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); struct name_list { char name[HEXDIR_LEN]; struct list_head list; }; +struct nfs4_dir_ctx { + struct dir_context ctx; + struct list_head names; +}; + static int nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct list_head *names = arg; + struct nfs4_dir_ctx *ctx = arg; struct name_list *entry; if (namlen != HEXDIR_LEN - 1) @@ -194,33 +257,36 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, return -ENOMEM; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; - list_add(&entry->list, names); + list_add(&entry->list, &ctx->names); return 0; } static int -nfsd4_list_rec_dir(recdir_func *f) +nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; - struct dentry *dir = rec_file->f_path.dentry; - LIST_HEAD(names); + struct dentry *dir = nn->rec_file->f_path.dentry; + struct nfs4_dir_ctx ctx = { + .ctx.actor = nfsd4_build_namelist, + .names = LIST_HEAD_INIT(ctx.names) + }; int status; status = nfs4_save_creds(&original_cred); if (status < 0) return status; - status = vfs_llseek(rec_file, 0, SEEK_SET); + status = vfs_llseek(nn->rec_file, 0, SEEK_SET); if (status < 0) { nfs4_reset_creds(original_cred); return status; } - status = vfs_readdir(rec_file, nfsd4_build_namelist, &names); + status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - while (!list_empty(&names)) { + while (!list_empty(&ctx.names)) { struct name_list *entry; - entry = list_entry(names.next, struct name_list, list); + entry = list_entry(ctx.names.next, struct name_list, list); if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); @@ -228,7 +294,7 @@ nfsd4_list_rec_dir(recdir_func *f) status = PTR_ERR(dentry); break; } - status = f(dir, dentry); + status = f(dir, dentry, nn); dput(dentry); } list_del(&entry->list); @@ -240,14 +306,14 @@ nfsd4_list_rec_dir(recdir_func *f) } static int -nfsd4_unlink_clid_dir(char *name, int namlen) +nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - dir = rec_file->f_path.dentry; + dir = nn->rec_file->f_path.dentry; mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { @@ -265,95 +331,113 @@ out_unlock: return status; } -void +static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; + struct nfs4_client_reclaim *crp; + char dname[HEXDIR_LEN]; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!rec_file || !clp->cl_firststate) + if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - status = mnt_want_write_file(rec_file); + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(clp, status); + + status = mnt_want_write_file(nn->rec_file); if (status) goto out; - clp->cl_firststate = 0; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); status = nfs4_save_creds(&original_cred); if (status < 0) - goto out; + goto out_drop_write; - status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); + status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); nfs4_reset_creds(original_cred); - if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + if (status == 0) { + vfs_fsync(nn->rec_file, 0); + if (nn->in_grace) { + /* remove reclaim record */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) + nfs4_remove_reclaim_record(crp, nn); + } + } +out_drop_write: + mnt_drop_write_file(nn->rec_file); out: if (status) printk("NFSD: Failed to remove expired client state directory" - " %.*s\n", HEXDIR_LEN, clp->cl_recdir); - return; + " %.*s\n", HEXDIR_LEN, dname); } static int -purge_old(struct dentry *parent, struct dentry *child) +purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; - if (nfs4_has_reclaimed_state(child->d_name.name, false)) + if (nfs4_has_reclaimed_state(child->d_name.name, nn)) return 0; status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %s\n", - child->d_name.name); + printk("failed to remove client recovery directory %pd\n", + child); /* Keep trying, success or failure: */ return 0; } -void -nfsd4_recdir_purge_old(void) { +static void +nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) +{ int status; - if (!rec_file) + nn->in_grace = false; + if (!nn->rec_file) return; - status = mnt_want_write_file(rec_file); + status = mnt_want_write_file(nn->rec_file); if (status) goto out; - status = nfsd4_list_rec_dir(purge_old); + status = nfsd4_list_rec_dir(purge_old, nn); if (status == 0) - vfs_fsync(rec_file, 0); - mnt_drop_write_file(rec_file); + vfs_fsync(nn->rec_file, 0); + mnt_drop_write_file(nn->rec_file); out: + nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); } static int -load_recdir(struct dentry *parent, struct dentry *child) +load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %s in recovery directory\n", - child->d_name.name); + printk("nfsd4: illegal name %pd in recovery directory\n", + child); /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name); + nfs4_client_to_reclaim(child->d_name.name, nn); return 0; } -int -nfsd4_recdir_load(void) { +static int +nfsd4_recdir_load(struct net *net) { int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (!rec_file) + if (!nn->rec_file) return 0; - status = nfsd4_list_rec_dir(load_recdir); + status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_file->f_path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); return status; } @@ -361,42 +445,123 @@ nfsd4_recdir_load(void) { * Hold reference to the recovery directory. */ -void -nfsd4_init_recdir() +static int +nfsd4_init_recdir(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct cred *original_cred; int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", user_recovery_dirname); - BUG_ON(rec_file); + BUG_ON(nn->rec_file); status = nfs4_save_creds(&original_cred); if (status < 0) { printk("NFSD: Unable to change credentials to find recovery" " directory: error %d\n", status); - return; + return status; } - rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); - if (IS_ERR(rec_file)) { + nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); + if (IS_ERR(nn->rec_file)) { printk("NFSD: unable to find recovery directory %s\n", user_recovery_dirname); - rec_file = NULL; + status = PTR_ERR(nn->rec_file); + nn->rec_file = NULL; } nfs4_reset_creds(original_cred); + if (!status) + nn->in_grace = true; + return status; } -void -nfsd4_shutdown_recdir(void) + +static int +nfs4_legacy_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + + return 0; +} + +static void +nfs4_legacy_state_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + kfree(nn->reclaim_str_hashtbl); +} + +static int +nfsd4_load_reboot_recovery_data(struct net *net) +{ + int status; + + status = nfsd4_init_recdir(net); + if (!status) + status = nfsd4_recdir_load(net); + if (status) + printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; +} + +static int +nfsd4_legacy_tracking_init(struct net *net) { - if (!rec_file) + int status; + + /* XXX: The legacy code won't work in a container */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " + "tracking in a container!\n"); + return -EINVAL; + } + + status = nfs4_legacy_state_init(net); + if (status) + return status; + + status = nfsd4_load_reboot_recovery_data(net); + if (status) + goto err; + return 0; + +err: + nfs4_legacy_state_shutdown(net); + return status; +} + +static void +nfsd4_shutdown_recdir(struct nfsd_net *nn) +{ + if (!nn->rec_file) return; - fput(rec_file); - rec_file = NULL; + fput(nn->rec_file); + nn->rec_file = NULL; +} + +static void +nfsd4_legacy_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_shutdown_recdir(nn); + nfs4_legacy_state_shutdown(net); } /* @@ -425,3 +590,829 @@ nfs4_recoverydir(void) { return user_recovery_dirname; } + +static int +nfsd4_check_legacy_client(struct nfs4_client *clp) +{ + int status; + char dname[HEXDIR_LEN]; + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + /* did we already find that this client is stable? */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) { + legacy_recdir_name_error(clp, status); + return status; + } + + /* look for it in the reclaim hashtable otherwise */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) { + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp->cr_clp = clp; + return 0; + } + + return -ENOENT; +} + +static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { + .init = nfsd4_legacy_tracking_init, + .exit = nfsd4_legacy_tracking_exit, + .create = nfsd4_create_clid_dir, + .remove = nfsd4_remove_clid_dir, + .check = nfsd4_check_legacy_client, + .grace_done = nfsd4_recdir_purge_old, +}; + +/* Globals */ +#define NFSD_PIPE_DIR "nfsd" +#define NFSD_CLD_PIPE "cld" + +/* per-net-ns structure for holding cld upcall info */ +struct cld_net { + struct rpc_pipe *cn_pipe; + spinlock_t cn_lock; + struct list_head cn_list; + unsigned int cn_xid; +}; + +struct cld_upcall { + struct list_head cu_list; + struct cld_net *cu_net; + struct task_struct *cu_task; + struct cld_msg cu_msg; +}; + +static int +__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + struct rpc_pipe_msg msg; + + memset(&msg, 0, sizeof(msg)); + msg.data = cmsg; + msg.len = sizeof(*cmsg); + + /* + * Set task state before we queue the upcall. That prevents + * wake_up_process in the downcall from racing with schedule. + */ + set_current_state(TASK_UNINTERRUPTIBLE); + ret = rpc_queue_upcall(pipe, &msg); + if (ret < 0) { + set_current_state(TASK_RUNNING); + goto out; + } + + schedule(); + set_current_state(TASK_RUNNING); + + if (msg.errno < 0) + ret = msg.errno; +out: + return ret; +} + +static int +cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + + /* + * -EAGAIN occurs when pipe is closed and reopened while there are + * upcalls queued. + */ + do { + ret = __cld_pipe_upcall(pipe, cmsg); + } while (ret == -EAGAIN); + + return ret; +} + +static ssize_t +cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) +{ + struct cld_upcall *tmp, *cup; + struct cld_msg __user *cmsg = (struct cld_msg __user *)src; + uint32_t xid; + struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + if (mlen != sizeof(*cmsg)) { + dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, + sizeof(*cmsg)); + return -EINVAL; + } + + /* copy just the xid so we can try to find that */ + if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) { + dprintk("%s: error when copying xid from userspace", __func__); + return -EFAULT; + } + + /* walk the list and find corresponding xid */ + cup = NULL; + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { + cup = tmp; + list_del_init(&cup->cu_list); + break; + } + } + spin_unlock(&cn->cn_lock); + + /* couldn't find upcall? */ + if (!cup) { + dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid); + return -EINVAL; + } + + if (copy_from_user(&cup->cu_msg, src, mlen) != 0) + return -EFAULT; + + wake_up_process(cup->cu_task); + return mlen; +} + +static void +cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct cld_msg *cmsg = msg->data; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, + cu_msg); + + /* errno >= 0 means we got a downcall */ + if (msg->errno >= 0) + return; + + wake_up_process(cup->cu_task); +} + +static const struct rpc_pipe_ops cld_upcall_ops = { + .upcall = rpc_pipe_generic_upcall, + .downcall = cld_pipe_downcall, + .destroy_msg = cld_pipe_destroy_msg, +}; + +static struct dentry * +nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe) +{ + struct dentry *dir, *dentry; + + dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR); + if (dir == NULL) + return ERR_PTR(-ENOENT); + dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); + dput(dir); + return dentry; +} + +static void +nfsd4_cld_unregister_sb(struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static struct dentry * +nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + struct dentry *dentry; + + sb = rpc_get_sb_net(net); + if (!sb) + return NULL; + dentry = nfsd4_cld_register_sb(sb, pipe); + rpc_put_sb_net(net); + return dentry; +} + +static void +nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + + sb = rpc_get_sb_net(net); + if (sb) { + nfsd4_cld_unregister_sb(pipe); + rpc_put_sb_net(net); + } +} + +/* Initialize rpc_pipefs pipe for communication with client tracking daemon */ +static int +nfsd4_init_cld_pipe(struct net *net) +{ + int ret; + struct dentry *dentry; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn; + + if (nn->cld_net) + return 0; + + cn = kzalloc(sizeof(*cn), GFP_KERNEL); + if (!cn) { + ret = -ENOMEM; + goto err; + } + + cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(cn->cn_pipe)) { + ret = PTR_ERR(cn->cn_pipe); + goto err; + } + spin_lock_init(&cn->cn_lock); + INIT_LIST_HEAD(&cn->cn_list); + + dentry = nfsd4_cld_register_net(net, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err_destroy_data; + } + + cn->cn_pipe->dentry = dentry; + nn->cld_net = cn; + return 0; + +err_destroy_data: + rpc_destroy_pipe_data(cn->cn_pipe); +err: + kfree(cn); + printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n", + ret); + return ret; +} + +static void +nfsd4_remove_cld_pipe(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + nfsd4_cld_unregister_net(net, cn->cn_pipe); + rpc_destroy_pipe_data(cn->cn_pipe); + kfree(nn->cld_net); + nn->cld_net = NULL; +} + +static struct cld_upcall * +alloc_cld_upcall(struct cld_net *cn) +{ + struct cld_upcall *new, *tmp; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return new; + + /* FIXME: hard cap on number in flight? */ +restart_search: + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (tmp->cu_msg.cm_xid == cn->cn_xid) { + cn->cn_xid++; + spin_unlock(&cn->cn_lock); + goto restart_search; + } + } + new->cu_task = current; + new->cu_msg.cm_vers = CLD_UPCALL_VERSION; + put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); + new->cu_net = cn; + list_add(&new->cu_list, &cn->cn_list); + spin_unlock(&cn->cn_lock); + + dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid); + + return new; +} + +static void +free_cld_upcall(struct cld_upcall *victim) +{ + struct cld_net *cn = victim->cu_net; + + spin_lock(&cn->cn_lock); + list_del(&victim->cu_list); + spin_unlock(&cn->cn_lock); + kfree(victim); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_create(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already stored */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Create; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to create client " + "record on stable storage: %d\n", ret); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_remove(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already removed */ + if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Remove; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to remove client " + "record from stable storage: %d\n", ret); +} + +/* Check for presence of a record, and update its timestamp */ +static int +nfsd4_cld_check(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if one was already stored during this grace pd */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + cup = alloc_cld_upcall(cn); + if (!cup) { + printk(KERN_ERR "NFSD: Unable to check client record on " + "stable storage: %d\n", -ENOMEM); + return -ENOMEM; + } + + cup->cu_msg.cm_cmd = Cld_Check; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); + return ret; +} + +static void +nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceDone; + cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); +} + +static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { + .init = nfsd4_init_cld_pipe, + .exit = nfsd4_remove_cld_pipe, + .create = nfsd4_cld_create, + .remove = nfsd4_cld_remove, + .check = nfsd4_cld_check, + .grace_done = nfsd4_cld_grace_done, +}; + +/* upcall via usermodehelper */ +static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; +module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), + S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program"); + +static bool cltrack_legacy_disable; +module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_legacy_disable, + "Disable legacy recoverydir conversion. Default: false"); + +#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" +#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" + +static char * +nfsd4_cltrack_legacy_topdir(void) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + len = strlen(LEGACY_TOPDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s", + nfs4_recoverydir()); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + +static char * +nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + /* +1 is for '/' between "topdir" and "recdir" */ + len = strlen(LEGACY_RECDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/", + nfs4_recoverydir()); + if (copied > (len - HEXDIR_LEN)) { + /* just return nothing if output will be truncated */ + kfree(result); + return NULL; + } + + copied = nfs4_make_rec_clidname(result + copied, name); + if (copied) { + kfree(result); + return NULL; + } + + return result; +} + +static int +nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) +{ + char *envp[2]; + char *argv[4]; + int ret; + + if (unlikely(!cltrack_prog[0])) { + dprintk("%s: cltrack_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s: cmd: %s\n", __func__, cmd); + dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); + dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); + + envp[0] = legacy; + envp[1] = NULL; + + argv[0] = (char *)cltrack_prog; + argv[1] = cmd; + argv[2] = arg; + argv[3] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or EACCES + * error. The admin can re-enable it on the fly by using sysfs + * once the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + dprintk("NFSD: %s was not found or isn't executable (%d). " + "Setting cltrack_prog to blank string!", + cltrack_prog, ret); + cltrack_prog[0] = '\0'; + } + dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret); + + return ret; +} + +static char * +bin_to_hex_dup(const unsigned char *src, int srclen) +{ + int i; + char *buf, *hex; + + /* +1 for terminating NULL */ + buf = kmalloc((srclen * 2) + 1, GFP_KERNEL); + if (!buf) + return buf; + + hex = buf; + for (i = 0; i < srclen; i++) { + sprintf(hex, "%2.2x", *src++); + hex += 2; + } + return buf; +} + +static int +nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) +{ + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } + return nfsd4_umh_cltrack_upcall("init", NULL, NULL); +} + +static void +nfsd4_umh_cltrack_create(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("create", hexid, NULL); + kfree(hexid); +} + +static void +nfsd4_umh_cltrack_remove(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("remove", hexid, NULL); + kfree(hexid); +} + +static int +nfsd4_umh_cltrack_check(struct nfs4_client *clp) +{ + int ret; + char *hexid, *legacy; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return -ENOMEM; + } + legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); + ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); + kfree(legacy); + kfree(hexid); + return ret; +} + +static void +nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, + time_t boot_time) +{ + char *legacy; + char timestr[22]; /* FIXME: better way to determine max size? */ + + sprintf(timestr, "%ld", boot_time); + legacy = nfsd4_cltrack_legacy_topdir(); + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); + kfree(legacy); +} + +static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { + .init = nfsd4_umh_cltrack_init, + .exit = NULL, + .create = nfsd4_umh_cltrack_create, + .remove = nfsd4_umh_cltrack_remove, + .check = nfsd4_umh_cltrack_check, + .grace_done = nfsd4_umh_cltrack_grace_done, +}; + +int +nfsd4_client_tracking_init(struct net *net) +{ + int status; + struct path path; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + /* just run the init if it the method is already decided */ + if (nn->client_tracking_ops) + goto do_init; + + /* + * First, try a UMH upcall. It should succeed or fail quickly, so + * there's little harm in trying that first. + */ + nn->client_tracking_ops = &nfsd4_umh_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + + /* + * See if the recoverydir exists and is a directory. If it is, + * then use the legacy ops. + */ + nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); + if (!status) { + status = S_ISDIR(path.dentry->d_inode->i_mode); + path_put(&path); + if (status) + goto do_init; + } + + /* Finally, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " + "removed in 3.10. Please transition to using " + "nfsdcltrack.\n"); +do_init: + status = nn->client_tracking_ops->init(net); + if (status) { + printk(KERN_WARNING "NFSD: Unable to initialize client " + "recovery tracking! (%d)\n", status); + nn->client_tracking_ops = NULL; + } + return status; +} + +void +nfsd4_client_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->client_tracking_ops) { + if (nn->client_tracking_ops->exit) + nn->client_tracking_ops->exit(net); + nn->client_tracking_ops = NULL; + } +} + +void +nfsd4_client_record_create(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->create(clp); +} + +void +nfsd4_client_record_remove(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->remove(clp); +} + +int +nfsd4_client_record_check(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + return nn->client_tracking_ops->check(clp); + + return -EOPNOTSUPP; +} + +void +nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) +{ + if (nn->client_tracking_ops) + nn->client_tracking_ops->grace_done(nn, boot_time); +} + +static int +rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + struct dentry *dentry; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + if (!cn) { + module_put(THIS_MODULE); + return 0; + } + + switch (event) { + case RPC_PIPEFS_MOUNT: + dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + break; + } + cn->cn_pipe->dentry = dentry; + break; + case RPC_PIPEFS_UMOUNT: + if (cn->cn_pipe->dentry) + nfsd4_cld_unregister_sb(cn->cn_pipe); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfsd4_cld_block = { + .notifier_call = rpc_pipefs_event, +}; + +int +register_cld_notifier(void) +{ + return rpc_pipefs_notifier_register(&nfsd4_cld_block); +} + +void +unregister_cld_notifier(void) +{ + rpc_pipefs_notifier_unregister(&nfsd4_cld_block); +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e8c98f00967..2204e1fe572 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -38,17 +38,18 @@ #include <linux/namei.h> #include <linux/swap.h> #include <linux/pagemap.h> +#include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> +#include <linux/hash.h> #include "xdr4.h" +#include "xdr4cb.h" #include "vfs.h" +#include "current_stateid.h" -#define NFSDDBG_FACILITY NFSDDBG_PROC +#include "netns.h" -/* Globals */ -time_t nfsd4_lease = 90; /* default lease time */ -time_t nfsd4_grace = 90; -static time_t boot_time; +#define NFSDDBG_FACILITY NFSDDBG_PROC #define all_ones {{~0,~0},~0} static const stateid_t one_stateid = { @@ -58,11 +59,15 @@ static const stateid_t one_stateid = { static const stateid_t zero_stateid = { /* all fields zero */ }; +static const stateid_t currentstateid = { + .si_generation = 1, +}; static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) +#define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -77,13 +82,13 @@ static DEFINE_MUTEX(client_mutex); * effort to decrease the scope of the client_mutex, this spinlock may * eventually cover more: */ -static DEFINE_SPINLOCK(recall_lock); +static DEFINE_SPINLOCK(state_lock); -static struct kmem_cache *openowner_slab = NULL; -static struct kmem_cache *lockowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab; void nfs4_lock_state(void) @@ -91,12 +96,125 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } +static void free_session(struct nfsd4_session *); + +static bool is_session_dead(struct nfsd4_session *ses) +{ + return ses->se_flags & NFS4_SESSION_DEAD; +} + +void nfsd4_put_session(struct nfsd4_session *ses) +{ + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); +} + +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) +{ + if (atomic_read(&ses->se_ref) > ref_held_by_me) + return nfserr_jukebox; + ses->se_flags |= NFS4_SESSION_DEAD; + return nfs_ok; +} + +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) +{ + if (is_session_dead(ses)) + return nfserr_badsession; + atomic_inc(&ses->se_ref); + return nfs_ok; +} + void nfs4_unlock_state(void) { mutex_unlock(&client_mutex); } +static bool is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + clp->cl_time = 0; + return nfs_ok; +} + +static __be32 mark_client_expired(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + __be32 ret; + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + return ret; +} + +static __be32 get_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) + return nfserr_expired; + atomic_inc(&clp->cl_refcount); + return nfs_ok; +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (is_client_expired(clp)) { + WARN_ON(1); + printk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &nn->client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static void put_client_renew_locked(struct nfs4_client *clp) +{ + if (!atomic_dec_and_test(&clp->cl_refcount)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); +} + +void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -110,8 +228,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static struct list_head del_recall_lru; - static void nfsd4_free_file(struct nfs4_file *f) { kmem_cache_free(file_slab, f); @@ -120,9 +236,9 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { - if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { - list_del(&fi->fi_hash); - spin_unlock(&recall_lock); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + hlist_del(&fi->fi_hash); + spin_unlock(&state_lock); iput(fi->fi_inode); nfsd4_free_file(fi); } @@ -135,7 +251,7 @@ get_nfs4_file(struct nfs4_file *fi) } static int num_delegations; -unsigned int max_delegations; +unsigned long max_delegations; /* * Open owner state (share locks) @@ -155,8 +271,6 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; - /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) @@ -167,11 +281,11 @@ static unsigned int file_hashval(struct inode *ino) return hash_ptr(ino, FILE_HASH_BITS); } -static struct list_head file_hashtbl[FILE_HASH_SIZE]; +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { - BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); atomic_inc(&fp->fi_access[oflag]); } @@ -196,13 +310,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } @@ -216,21 +324,27 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) __nfs4_file_put_access(fp, oflag); } -static inline int get_new_stid(struct nfs4_stid *stid) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - static int min_stateid = 0; - struct idr *stateids = &stid->sc_client->cl_stateids; - int new_stid; - int error; + struct idr *stateids = &cl->cl_stateids; + struct nfs4_stid *stid; + int new_id; + + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + if (new_id < 0) + goto out_free; + stid->sc_client = cl; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; + /* Will be incremented before return to client: */ + stid->sc_stateid.si_generation = 0; - error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); - /* - * Note: the necessary preallocation was done in - * nfs4_alloc_stateid(). The idr code caps the number of - * preallocations that can exist at a time, but the state lock - * prevents anyone from using ours before we get here: - */ - BUG_ON(error); /* * It shouldn't be a problem to reuse an opaque stateid value. * I don't think it is for 4.1. But with 4.0 I worry that, for @@ -240,69 +354,103 @@ static inline int get_new_stid(struct nfs4_stid *stid) * amount of time until an id is reused, by ensuring they always * "increase" (mod INT_MAX): */ - - min_stateid = new_stid+1; - if (min_stateid == INT_MAX) - min_stateid = 0; - return new_stid; + return stid; +out_free: + kmem_cache_free(slab, stid); + return NULL; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) { - stateid_t *s = &stid->sc_stateid; - int new_id; - - stid->sc_type = type; - stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; - /* Will be incremented before return to client: */ - s->si_generation = 0; + return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); } -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) +/* + * When we recall a delegation, we should be careful not to hand it + * out again straight away. + * To ensure this we keep a pair of bloom filters ('new' and 'old') + * in which the filehandles of recalled delegations are "stored". + * If a filehandle appear in either filter, a delegation is blocked. + * When a delegation is recalled, the filehandle is stored in the "new" + * filter. + * Every 30 seconds we swap the filters and clear the "new" one, + * unless both are empty of course. + * + * Each filter is 256 bits. We hash the filehandle to 32bit and use the + * low 3 bytes as hash-table indices. + * + * 'state_lock', which is always held when block_delegations() is called, + * is used to manage concurrent access. Testing does not need the lock + * except when swapping the two filters. + */ +static struct bloom_pair { + int entries, old_entries; + time_t swap_time; + int new; /* index into 'set' */ + DECLARE_BITMAP(set[2], 256); +} blocked_delegations; + +static int delegation_blocked(struct knfsd_fh *fh) { - struct idr *stateids = &cl->cl_stateids; + u32 hash; + struct bloom_pair *bd = &blocked_delegations; - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; - /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. - */ - return kmem_cache_alloc(slab, GFP_KERNEL); + if (bd->entries == 0) + return 0; + if (seconds_since_boot() - bd->swap_time > 30) { + spin_lock(&state_lock); + if (seconds_since_boot() - bd->swap_time > 30) { + bd->entries -= bd->old_entries; + bd->old_entries = bd->entries; + memset(bd->set[bd->new], 0, + sizeof(bd->set[0])); + bd->new = 1-bd->new; + bd->swap_time = seconds_since_boot(); + } + spin_unlock(&state_lock); + } + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + if (test_bit(hash&255, bd->set[0]) && + test_bit((hash>>8)&255, bd->set[0]) && + test_bit((hash>>16)&255, bd->set[0])) + return 1; + + if (test_bit(hash&255, bd->set[1]) && + test_bit((hash>>8)&255, bd->set[1]) && + test_bit((hash>>16)&255, bd->set[1])) + return 1; + + return 0; } -static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +static void block_delegations(struct knfsd_fh *fh) { - return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); + u32 hash; + struct bloom_pair *bd = &blocked_delegations; + + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + + __set_bit(hash&255, bd->set[bd->new]); + __set_bit((hash>>8)&255, bd->set[bd->new]); + __set_bit((hash>>16)&255, bd->set[bd->new]); + if (bd->entries == 0) + bd->swap_time = seconds_since_boot(); + bd->entries += 1; } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; - struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - /* - * Major work on the lease subsystem (for example, to support - * calbacks on stat) will be required before we can support - * write delegations properly. - */ - if (type != NFS4_OPEN_DELEGATE_READ) - return NULL; - if (fp->fi_had_conflict) - return NULL; if (num_delegations > max_delegations) return NULL; + if (delegation_blocked(¤t_fh->fh_handle)) + return NULL; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -313,29 +461,40 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - get_nfs4_file(fp); - dp->dl_file = fp; - dp->dl_type = type; + dp->dl_file = NULL; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&dp->dl_recall); return dp; } +static void remove_stid(struct nfs4_stid *s) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +} + +static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) +{ + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - dprintk("NFSD: freeing dp %p\n",dp); - put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; } } static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + if (!fp->fi_lease) + return; if (atomic_dec_and_test(&fp->fi_delegees)) { vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); fp->fi_lease = NULL; @@ -346,37 +505,68 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; + s->sc_type = 0; +} - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + lockdep_assert_held(&state_lock); + + dp->dl_stid.sc_type = NFS4_DELEG_STID; + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - unhash_stid(&dp->dl_stid); + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); - spin_lock(&recall_lock); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); - spin_unlock(&recall_lock); - nfs4_put_deleg_lease(dp->dl_file); + spin_unlock(&state_lock); + if (dp->dl_file) { + nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + } +} + + + +static void destroy_revoked_delegation(struct nfs4_delegation *dp) +{ + list_del_init(&dp->dl_recall_lru); + remove_stid(&dp->dl_stid); + nfs4_put_delegation(dp); +} + +static void destroy_delegation(struct nfs4_delegation *dp) +{ + unhash_delegation(dp); + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } +static void revoke_delegation(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (clp->cl_minorversion == 0) + destroy_delegation(dp); + else { + unhash_delegation(dp); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + } +} + /* * SETCLIENTID state */ -/* client_lock protects the client lru list and session hash table */ -static DEFINE_SPINLOCK(client_lock); - -/* Hash tables for nfs4_clientid state */ -#define CLIENT_HASH_BITS 4 -#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) -#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) - static unsigned int clientid_hashval(u32 id) { return id & CLIENT_HASH_MASK; @@ -388,31 +578,6 @@ static unsigned int clientstr_hashval(const char *name) } /* - * reclaim_str_hashtbl[] holds known client info from previous reset/reboot - * used in reboot/reset lease grace period processing - * - * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed - * setclientid_confirmed info. - * - * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed - * setclientid info. - * - * client_lru holds client queue ordered by nfs4_client.cl_time - * for lease renewal. - * - * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time - * for last close replay. - */ -static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; -static int reclaim_str_hashtbl_size = 0; -static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head client_lru; -static struct list_head close_lru; - -/* * We store the NONE, READ, WRITE, and BOTH bits separately in the * st_{access,deny}_bmap field of the stateid, in order to track not * only what share bits are currently in force, but also what @@ -430,37 +595,69 @@ static struct list_head close_lru; * * which we should reject. */ -static void -set_access(unsigned int *access, unsigned long bmap) { +static unsigned int +bmap_to_share_mode(unsigned long bmap) { int i; + unsigned int access = 0; - *access = 0; for (i = 1; i < 4; i++) { if (test_bit(i, &bmap)) - *access |= i; + access |= i; } + return access; } -static void -set_deny(unsigned int *deny, unsigned long bmap) { - int i; - - *deny = 0; - for (i = 0; i < 4; i++) { - if (test_bit(i, &bmap)) - *deny |= i ; - } -} - -static int +static bool test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { unsigned int access, deny; - set_access(&access, stp->st_access_bmap); - set_deny(&deny, stp->st_deny_bmap); + access = bmap_to_share_mode(stp->st_access_bmap); + deny = bmap_to_share_mode(stp->st_deny_bmap); if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return 0; - return 1; + return false; + return true; +} + +/* set share access for a given stateid */ +static inline void +set_access(u32 access, struct nfs4_ol_stateid *stp) +{ + __set_bit(access, &stp->st_access_bmap); +} + +/* clear share access for a given stateid */ +static inline void +clear_access(u32 access, struct nfs4_ol_stateid *stp) +{ + __clear_bit(access, &stp->st_access_bmap); +} + +/* test whether a given stateid has access */ +static inline bool +test_access(u32 access, struct nfs4_ol_stateid *stp) +{ + return test_bit(access, &stp->st_access_bmap); +} + +/* set share deny for a given stateid */ +static inline void +set_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + __set_bit(access, &stp->st_deny_bmap); +} + +/* clear share deny for a given stateid */ +static inline void +clear_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + __clear_bit(access, &stp->st_deny_bmap); +} + +/* test whether a given stateid is denying specific access */ +static inline bool +test_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + return test_bit(access, &stp->st_deny_bmap); } static int nfs4_access_to_omode(u32 access) @@ -473,7 +670,22 @@ static int nfs4_access_to_omode(u32 access) case NFS4_SHARE_ACCESS_BOTH: return O_RDWR; } - BUG(); + WARN_ON_ONCE(1); + return O_RDONLY; +} + +/* release all access and file references for a given stateid */ +static void +release_all_access(struct nfs4_ol_stateid *stp) +{ + int i; + + for (i = 1; i < 4; i++) { + if (test_access(i, stp)) + nfs4_file_put_access(stp->st_file, + nfs4_access_to_omode(i)); + clear_access(i, stp); + } } static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) @@ -484,23 +696,15 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) static void close_generic_stateid(struct nfs4_ol_stateid *stp) { - int i; - - if (stp->st_access_bmap) { - for (i = 1; i < 4; i++) { - if (test_bit(i, &stp->st_access_bmap)) - nfs4_file_put_access(stp->st_file, - nfs4_access_to_omode(i)); - __clear_bit(i, &stp->st_access_bmap); - } - } + release_all_access(stp); put_nfs4_file(stp->st_file); stp->st_file = NULL; } static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + remove_stid(&stp->st_stid); + nfs4_free_stid(stateid_slab, &stp->st_stid); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -530,6 +734,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) } } +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); @@ -558,7 +768,6 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp) static void release_open_stateid(struct nfs4_ol_stateid *stp) { unhash_open_stateid(stp); - unhash_stid(&stp->st_stid); free_generic_stateid(stp); } @@ -580,12 +789,17 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { - unhash_stid(&s->st_stid); free_generic_stateid(s); oo->oo_last_closed_stid = NULL; } } +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); @@ -594,9 +808,6 @@ static void release_openowner(struct nfs4_openowner *oo) nfs4_free_openowner(oo); } -#define SESSION_HASH_SIZE 512 -static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; - static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { @@ -605,12 +816,42 @@ hash_sessionid(struct nfs4_sessionid *sessionid) return sid->sequence % SESSION_HASH_SIZE; } +#ifdef NFSD_DEBUG static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { u32 *ptr = (u32 *)(&sessionid->data[0]); dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); } +#else +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ +} +#endif + +/* + * Bump the seqid on cstate->replay_owner, and clear replay_owner if it + * won't be used for replay. + */ +void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (nfserr == nfserr_replay_me) + return; + + if (!seqid_mutating_err(ntohl(nfserr))) { + cstate->replay_owner = NULL; + return; + } + if (!so) + return; + if (so->so_is_open_owner) + release_last_closed_stateid(openowner(so)); + so->so_seqid++; + return; +} static void gen_sessionid(struct nfsd4_session *ses) @@ -651,17 +892,15 @@ free_session_slots(struct nfsd4_session *ses) * We don't actually need to cache the rpc and session headers, so we * can allocate a little less for each slot: */ -static inline int slot_bytes(struct nfsd4_channel_attrs *ca) -{ - return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; -} - -static int nfsd4_sanitize_slot_size(u32 size) +static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) { - size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ - size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); + u32 size; - return size; + if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) + size = 0; + else + size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + return size + sizeof(struct nfsd4_slot); } /* @@ -669,15 +908,15 @@ static int nfsd4_sanitize_slot_size(u32 size) * re-negotiate active sessions and reduce their slot usage to make * room for new connections. For now we just fail the create session. */ -static int nfsd4_get_drc_mem(int slotsize, u32 num) +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) { + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; int avail; - num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); - spin_lock(&nfsd_drc_lock); - avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, - nfsd_drc_max_mem - nfsd_drc_mem_used); + avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -685,15 +924,20 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num) return num; } -static void nfsd4_put_drc_mem(int slotsize, int num) +static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) { + int slotsize = slot_bytes(ca); + spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * num; + nfsd_drc_mem_used -= slotsize * ca->maxreqs; spin_unlock(&nfsd_drc_lock); } -static struct nfsd4_session *alloc_session(int slotsize, int numslots) +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, + struct nfsd4_channel_attrs *battrs) { + int numslots = fattrs->maxreqs; + int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; int mem, i; @@ -706,11 +950,14 @@ static struct nfsd4_session *alloc_session(int slotsize, int numslots) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ for (i = 0; i < numslots; i++) { - mem = sizeof(struct nfsd4_slot) + slotsize; - new->se_slots[i] = kzalloc(mem, GFP_KERNEL); + new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); if (!new->se_slots[i]) goto out_free; } + + memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); + return new; out_free: while (i--) @@ -719,18 +966,6 @@ out_free: return NULL; } -static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) -{ - u32 maxrpc = nfsd_serv->sv_max_mesg; - - new->maxreqs = numslots; - new->maxresp_cached = min_t(u32, req->maxresp_cached, - slotsize + NFSD_MIN_HDR_SEQ_SZ); - new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); - new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); - new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); -} - static void free_conn(struct nfsd4_conn *c) { svc_xprt_put(c->cn_xprt); @@ -747,8 +982,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) list_del(&c->cn_persession); free_conn(c); } - spin_unlock(&clp->cl_lock); nfsd4_probe_callback(clp); + spin_unlock(&clp->cl_lock); } static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) @@ -786,30 +1021,28 @@ static int nfsd4_register_conn(struct nfsd4_conn *conn) return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); } -static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir) +static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses) { - struct nfsd4_conn *conn; int ret; - conn = alloc_conn(rqstp, dir); - if (!conn) - return nfserr_jukebox; nfsd4_hash_conn(conn, ses); ret = nfsd4_register_conn(conn); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); - return nfs_ok; + if (conn->cn_flags & NFS4_CDFC4_BACK) { + /* callback channel may be back up */ + nfsd4_probe_callback(ses->se_client); + } } -static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses) +static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) { u32 dir = NFS4_CDFC4_FORE; - if (ses->se_flags & SESSION4_BACK_CHAN) + if (cses->flags & SESSION4_BACK_CHAN) dir |= NFS4_CDFC4_BACK; - - return nfsd4_new_conn(rqstp, ses, dir); + return alloc_conn(rqstp, dir); } /* must be called under client_lock */ @@ -832,47 +1065,26 @@ static void nfsd4_del_conns(struct nfsd4_session *s) spin_unlock(&clp->cl_lock); } -void free_session(struct kref *kref) +static void __free_session(struct nfsd4_session *ses) { - struct nfsd4_session *ses; - int mem; - - ses = container_of(kref, struct nfsd4_session, se_ref); - nfsd4_del_conns(ses); - spin_lock(&nfsd_drc_lock); - mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); - nfsd_drc_mem_used -= mem; - spin_unlock(&nfsd_drc_lock); free_session_slots(ses); kfree(ses); } -static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) +static void free_session(struct nfsd4_session *ses) { - struct nfsd4_session *new; - struct nfsd4_channel_attrs *fchan = &cses->fore_channel; - int numslots, slotsize; - int status; - int idx; + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - /* - * Note decreasing slot size below client's request may - * make it difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. - */ - slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); - numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); - if (numslots < 1) - return NULL; + lockdep_assert_held(&nn->client_lock); + nfsd4_del_conns(ses); + nfsd4_put_drc_mem(&ses->se_fchannel); + __free_session(ses); +} - new = alloc_session(slotsize, numslots); - if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); - return NULL; - } - init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); +static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) +{ + int idx; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); new->se_client = clp; gen_sessionid(new); @@ -882,21 +1094,16 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n new->se_cb_seq_nr = 1; new->se_flags = cses->flags; new->se_cb_prog = cses->callback_prog; - kref_init(&new->se_ref); + new->se_cb_sec = cses->cb_sec; + atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&client_lock); - list_add(&new->se_hash, &sessionid_hashtbl[idx]); + spin_lock(&nn->client_lock); + list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); - status = nfsd4_new_conn_from_crses(rqstp, new); - /* whoops: benny points out, status is ignored! (err, or bogus) */ - if (status) { - free_session(&new->se_ref); - return NULL; - } if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); /* @@ -909,21 +1116,20 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); } - nfsd4_probe_callback(clp); - return new; } /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ - list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { + list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { if (!memcmp(elem->se_sessionid.data, sessionid->data, NFS4_MAX_SESSIONID_LEN)) { return elem; @@ -944,41 +1150,14 @@ unhash_session(struct nfsd4_session *ses) spin_unlock(&ses->se_client->cl_lock); } -/* must be called under the client_lock */ -static inline void -renew_client_locked(struct nfs4_client *clp) -{ - if (is_client_expired(clp)) { - dprintk("%s: client (clientid %08x/%08x) already expired\n", - __func__, - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - return; - } - - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &client_lru); - clp->cl_time = get_seconds(); -} - -static inline void -renew_client(struct nfs4_client *clp) -{ - spin_lock(&client_lock); - renew_client_locked(clp); - spin_unlock(&client_lock); -} - /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int -STALE_CLIENTID(clientid_t *clid) +STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { - if (clid->cl_boot == boot_time) + if (clid->cl_boot == nn->boot_time) return 0; dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", - clid->cl_boot, clid->cl_id, boot_time); + clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -1000,48 +1179,48 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } -static inline void +static void free_client(struct nfs4_client *clp) { + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); - nfsd4_put_session(ses); + WARN_ON_ONCE(atomic_read(&ses->se_ref)); + free_session(ses); } - if (clp->cl_cred.cr_group_info) - put_group_info(clp->cl_cred.cr_group_info); - kfree(clp->cl_principal); + rpc_destroy_wait_queue(&clp->cl_cb_waitq); + free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } -void -release_session_client(struct nfsd4_session *session) -{ - struct nfs4_client *clp = session->se_client; - - if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock)) - return; - if (is_client_expired(clp)) { - free_client(clp); - session->se_client = NULL; - } else - renew_client_locked(clp); - spin_unlock(&client_lock); -} - /* must be called under the client_lock */ static inline void unhash_client_locked(struct nfs4_client *clp) { struct nfsd4_session *ses; - mark_client_expired(clp); list_del(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) @@ -1050,23 +1229,29 @@ unhash_client_locked(struct nfs4_client *clp) } static void -expire_client(struct nfs4_client *clp) +destroy_client(struct nfs4_client *clp) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); + } + list_splice_init(&clp->cl_revoked, &reaplist); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + destroy_revoked_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); @@ -1076,12 +1261,21 @@ expire_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); list_del(&clp->cl_idhash); - list_del(&clp->cl_strhash); - spin_lock(&client_lock); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + spin_lock(&nn->client_lock); unhash_client_locked(clp); - if (atomic_read(&clp->cl_refcount) == 0) - free_client(clp); - spin_unlock(&client_lock); + WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); + free_client(clp); + spin_unlock(&nn->client_lock); +} + +static void expire_client(struct nfs4_client *clp) +{ + nfsd4_client_record_remove(clp); + destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -1096,12 +1290,35 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -static void copy_cred(struct svc_cred *target, struct svc_cred *source) +static int copy_cred(struct svc_cred *target, struct svc_cred *source) { + if (source->cr_principal) { + target->cr_principal = + kstrdup(source->cr_principal, GFP_KERNEL); + if (target->cr_principal == NULL) + return -ENOMEM; + } else + target->cr_principal = NULL; + target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); + return 0; +} + +static long long +compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) +{ + long long res; + + res = o1->len - o2->len; + if (res) + return res; + return (long long)memcmp(o1->data, o2->data, o1->len); } static int same_name(const char *n1, const char *n2) @@ -1121,34 +1338,102 @@ same_clid(clientid_t *cl1, clientid_t *cl2) return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); } -/* XXX what about NGROUP */ -static int +static bool groups_equal(struct group_info *g1, struct group_info *g2) +{ + int i; + + if (g1->ngroups != g2->ngroups) + return false; + for (i=0; i<g1->ngroups; i++) + if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i))) + return false; + return true; +} + +/* + * RFC 3530 language requires clid_inuse be returned when the + * "principal" associated with a requests differs from that previously + * used. We use uid, gid's, and gss principal string as our best + * approximation. We also don't want to allow non-gss use of a client + * established using gss: in theory cr_principal should catch that + * change, but in practice cr_principal can be null even in the gss case + * since gssd doesn't always pass down a principal string. + */ +static bool is_gss_cred(struct svc_cred *cr) +{ + /* Is cr_flavor one of the gss "pseudoflavors"?: */ + return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR); +} + + +static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { - return cr1->cr_uid == cr2->cr_uid; + if ((is_gss_cred(cr1) != is_gss_cred(cr2)) + || (!uid_eq(cr1->cr_uid, cr2->cr_uid)) + || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) + || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) + return false; + if (cr1->cr_principal == cr2->cr_principal) + return true; + if (!cr1->cr_principal || !cr2->cr_principal) + return false; + return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } -static void gen_clid(struct nfs4_client *clp) +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + if (!cr->cr_gss_mech) + return false; + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); +} + +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; - clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_boot = nn->boot_time; clp->cl_clientid.cl_id = current_clientid++; } static void gen_confirm(struct nfs4_client *clp) { + __be32 verf[2]; static u32 i; - u32 *p; - p = (u32 *)clp->cl_confirm.data; - *p++ = get_seconds(); - *p++ = i++; + verf[0] = (__be32)get_seconds(); + verf[1] = (__be32)i++; + memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -1163,60 +1448,87 @@ static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t return NULL; } -static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, +static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { struct nfs4_client *clp; struct sockaddr *sa = svc_addr(rqstp); - char *princ; + int ret; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); - - princ = svc_gss_principal(rqstp); - if (princ) { - clp->cl_principal = kstrdup(princ, GFP_KERNEL); - if (clp->cl_principal == NULL) { - free_client(clp); - return NULL; - } + ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); + if (ret) { + spin_lock(&nn->client_lock); + free_client(clp); + spin_unlock(&nn->client_lock); + return NULL; } - - idr_init(&clp->cl_stateids); - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - spin_lock_init(&clp->cl_lock); - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); + nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); - clp->cl_flavor = rqstp->rq_flavor; - copy_cred(&clp->cl_cred, &rqstp->rq_cred); gen_confirm(clp); clp->cl_cb_session = NULL; + clp->net = net; return clp; } static void -add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct nfs4_client *clp; + + while (*new) { + clp = rb_entry(*new, struct nfs4_client, cl_namenode); + parent = *new; + + if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&new_clp->cl_namenode, parent, new); + rb_insert_color(&new_clp->cl_namenode, root); +} + +static struct nfs4_client * +find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) +{ + long long cmp; + struct rb_node *node = root->rb_node; + struct nfs4_client *clp; + + while (node) { + clp = rb_entry(node, struct nfs4_client, cl_namenode); + cmp = compare_blob(&clp->cl_name, name); + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + return clp; + } + return NULL; +} + +static void +add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); - list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); + list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); renew_client(clp); } @@ -1224,23 +1536,26 @@ static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); - unsigned int strhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); - list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); - strhashval = clientstr_hashval(clp->cl_recdir); - list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + add_clp_to_name_tree(clp, &nn->conf_name_tree); + set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); renew_client(clp); } static struct nfs4_client * -find_confirmed_client(clientid_t *clid) +find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { if (same_clid(&clp->cl_clientid, clid)) { + if ((bool)clp->cl_minorversion != sessions) + return NULL; renew_client(clp); return clp; } @@ -1249,16 +1564,19 @@ find_confirmed_client(clientid_t *clid) } static struct nfs4_client * -find_unconfirmed_client(clientid_t *clid) +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - struct nfs4_client *clp; - unsigned int idhashval = clientid_hashval(clid->cl_id); + struct list_head *tbl = nn->conf_id_hashtbl; - list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) - return clp; - } - return NULL; + return find_client_in_id_table(tbl, clid, sessions); +} + +static struct nfs4_client * +find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) +{ + struct list_head *tbl = nn->unconf_id_hashtbl; + + return find_client_in_id_table(tbl, clid, sessions); } static bool clp_used_exchangeid(struct nfs4_client *clp) @@ -1267,27 +1585,15 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) } static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval) +find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; - - list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_clp_in_name_tree(name, &nn->unconf_name_tree); } static void @@ -1308,7 +1614,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); @@ -1334,11 +1640,12 @@ out_err: } /* - * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. + * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { + struct xdr_buf *buf = resp->xdr.buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -1347,15 +1654,14 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; if (nfsd4_not_cached(resp)) { slot->sl_datalen = 0; return; } - slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap; - base = (char *)resp->cstate.datap - - (char *)resp->xbuf->head[0].iov_base; - if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data, - slot->sl_datalen)) + base = resp->cstate.data_offset; + slot->sl_datalen = buf->len - base; + if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) WARN("%s: sessions DRC could not cache compound\n", __func__); return; } @@ -1374,15 +1680,12 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, struct nfsd4_op *op; struct nfsd4_slot *slot = resp->cstate.slot; - dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, - resp->opcnt, resp->cstate.slot->sl_cachethis); - /* Encode the replayed sequence operation */ op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && slot->sl_cachethis == 0) { + if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); @@ -1394,28 +1697,31 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, * The sequence operation is not cached because we can use the slot and * session values. */ -__be32 +static __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; __be32 status; dprintk("--> %s slot %p\n", __func__, slot); - /* Either returns 0 or nfserr_retry_uncached */ status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); - if (status == nfserr_retry_uncached_rep) + if (status) return status; - /* The sequence operation has been encoded, cstate->datap set. */ - memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen); + p = xdr_reserve_space(xdr, slot->sl_datalen); + if (!p) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); + xdr_commit_encode(xdr); resp->opcnt = slot->sl_opcnt; - resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen); - status = slot->sl_status; - - return status; + return slot->sl_status; } /* @@ -1434,18 +1740,31 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) clid->flags = new->cl_exchange_flags; } +static bool client_has_state(struct nfs4_client *clp) +{ + /* + * Note clp->cl_openowners check isn't quite right: there's no + * need to count owners without stateid's. + * + * Also note we should probably be using this in 4.0 case too. + */ + return !list_empty(&clp->cl_openowners) + || !list_empty(&clp->cl_delegations) + || !list_empty(&clp->cl_sessions); +} + __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { struct nfs4_client *unconf, *conf, *new; - int status; - unsigned int strhashval; - char dname[HEXDIR_LEN]; + __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; struct sockaddr *sa = svc_addr(rqstp); + bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " @@ -1456,99 +1775,90 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; - /* Currently only support SP4_NONE */ switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; case SP4_NONE: break; + default: /* checked by xdr code */ + WARN_ON_ONCE(1); case SP4_SSV: - return nfserr_serverfault; - default: - BUG(); /* checked by xdr code */ - case SP4_MACH_CRED: - return nfserr_serverfault; /* no excuse :-/ */ + return nfserr_encr_alg_unsupp; } - status = nfs4_make_rec_clidname(dname, &exid->clname); - - if (status) - goto error; - - strhashval = clientstr_hashval(dname); - + /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); - status = nfs_ok; - - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { - if (!clp_used_exchangeid(conf)) { - status = nfserr_clid_inuse; /* XXX: ? */ - goto out; - } - if (!same_verf(&verf, &conf->cl_verifier)) { - /* 18.35.4 case 8 */ - if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); + bool verfs_match = same_verf(&verf, &conf->cl_verifier); + + if (update) { + if (!clp_used_exchangeid(conf)) { /* buggy client */ + status = nfserr_inval; + goto out; + } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } + if (!creds_match) { /* case 9 */ + status = nfserr_perm; + goto out; + } + if (!verfs_match) { /* case 8 */ status = nfserr_not_same; goto out; } - /* Client reboot: destroy old state */ - expire_client(conf); - goto out_new; + /* case 6 */ + exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + new = conf; + goto out_copy; } - if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - /* 18.35.4 case 9 */ - if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { - status = nfserr_perm; + if (!creds_match) { /* case 3 */ + if (client_has_state(conf)) { + status = nfserr_clid_inuse; goto out; } expire_client(conf); goto out_new; } - /* - * Set bit when the owner id and verifier map to an already - * confirmed client id (18.35.3). - */ - exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - - /* - * Falling into 18.35.4 case 2, possible router replay. - * Leave confirmed record intact and return same result. - */ - copy_verf(conf, &verf); - new = conf; - goto out_copy; + if (verfs_match) { /* case 2 */ + conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + new = conf; + goto out_copy; + } + /* case 5, client reboot */ + goto out_new; } - /* 18.35.4 case 7 */ - if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + if (update) { /* case 7 */ status = nfserr_noent; goto out; } - unconf = find_unconfirmed_client_by_str(dname, strhashval); - if (unconf) { - /* - * Possible retry or client restart. Per 18.35.4 case 4, - * a new unconfirmed record should be generated regardless - * of whether any properties have changed. - */ + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); + if (unconf) /* case 4, possible retry or client restart */ expire_client(unconf); - } + /* case 1 (normal case) */ out_new: - /* Normal case */ - new = create_client(exid->clname, dname, rqstp, &verf); + new = create_client(exid->clname, rqstp, &verf); if (new == NULL) { status = nfserr_jukebox; goto out; } + new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); - gen_clid(new); - add_to_unconfirmed(new, strhashval); + gen_clid(new, nn); + add_to_unconfirmed(new); out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - exid->seqid = 1; + exid->seqid = new->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", @@ -1557,12 +1867,10 @@ out_copy: out: nfs4_unlock_state(); -error: - dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); return status; } -static int +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) { dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, @@ -1575,16 +1883,11 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) else return nfserr_seq_misordered; } - /* Normal */ + /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; - /* Replay */ if (seqid == slot_seqid) return nfserr_replay_cache; - /* Wraparound */ - if (seqid == 1 && (slot_seqid + 1) == 0) - return nfs_ok; - /* Misordered replay or misordered new request */ return nfserr_seq_misordered; } @@ -1595,7 +1898,7 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) */ static void nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, - struct nfsd4_clid_slot *slot, int nfserr) + struct nfsd4_clid_slot *slot, __be32 nfserr) { slot->sl_status = nfserr; memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); @@ -1626,10 +1929,78 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, /* seqid, slotID, slotID, slotID, status */ \ 5 ) * sizeof(__be32)) -static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +{ + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; + + if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) + return nfserr_toosmall; + ca->headerpadsz = 0; + ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); + ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); + ca->maxresp_cached = min_t(u32, ca->maxresp_cached, + NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); + ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); + /* + * Note decreasing slot size below client's request may make it + * difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. Clients that + * request larger slots than they need will get poor results: + */ + ca->maxreqs = nfsd4_get_drc_mem(ca); + if (!ca->maxreqs) + return nfserr_jukebox; + + return nfs_ok; +} + +#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ + RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) +#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ + RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) + +static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) { - return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ - || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; + ca->headerpadsz = 0; + + /* + * These RPC_MAX_HEADER macros are overkill, especially since we + * don't even do gss on the backchannel yet. But this is still + * less than 1k. Tighten up this estimate in the unlikely event + * it turns out to be a problem for some client: + */ + if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) + return nfserr_toosmall; + ca->maxresp_cached = 0; + if (ca->maxops < 2) + return nfserr_toosmall; + + return nfs_ok; +} + +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) +{ + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } } __be32 @@ -1640,98 +2011,105 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_session *new; + struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; - bool confirm_me = false; - int status = 0; + __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; + status = check_forechannel_attrs(&cr_ses->fore_channel, nn); + if (status) + return status; + status = check_backchannel_attrs(&cr_ses->back_channel); + if (status) + goto out_release_drc_mem; + status = nfserr_jukebox; + new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); + if (!new) + goto out_release_drc_mem; + conn = alloc_conn_from_crses(rqstp, cr_ses); + if (!conn) + goto out_free_session; nfs4_lock_state(); - unconf = find_unconfirmed_client(&cr_ses->clientid); - conf = find_confirmed_client(&cr_ses->clientid); + unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); + conf = find_confirmed_client(&cr_ses->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { - dprintk("Got a create_session replay! seqid= %d\n", - cs_slot->sl_seqid); - /* Return the cached reply status */ status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out; + goto out_free_conn; } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; - dprintk("Sequence misordered!\n"); - dprintk("Expected seqid= %d but got seqid= %d\n", - cs_slot->sl_seqid, cr_ses->seqid); - goto out; + goto out_free_conn; } } else if (unconf) { + struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; - goto out; + goto out_free_conn; } - + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out; + goto out_free_conn; } - - confirm_me = true; + old = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (old) { + status = mark_client_expired(old); + if (status) + goto out_free_conn; + expire_client(old); + } + move_to_confirmed(unconf); conf = unconf; } else { status = nfserr_stale_clientid; - goto out; + goto out_free_conn; } - - /* - * XXX: we should probably set this at creation time, and check - * for consistent minorversion use throughout: - */ - conf->cl_minorversion = 1; + status = nfs_ok; /* * We do not support RDMA or persistent sessions */ cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; - status = nfserr_toosmall; - if (check_forechannel_attrs(cr_ses->fore_channel)) - goto out; + init_session(rqstp, new, conf, cr_ses); + nfsd4_init_conn(rqstp, conn, new); - status = nfserr_jukebox; - new = alloc_init_session(rqstp, conf, cr_ses); - if (!new) - goto out; - status = nfs_ok; memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); - memcpy(&cr_ses->fore_channel, &new->se_fchannel, - sizeof(struct nfsd4_channel_attrs)); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); - if (confirm_me) - move_to_confirmed(conf); -out: nfs4_unlock_state(); - dprintk("%s returns %d\n", __func__, ntohl(status)); return status; -} - -static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) -{ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct nfsd4_compoundargs *argp = rqstp->rq_argp; - - return argp->opcnt == resp->opcnt; +out_free_conn: + nfs4_unlock_state(); + free_conn(conn); +out_free_session: + __free_session(new); +out_release_drc_mem: + nfsd4_put_drc_mem(&cr_ses->fore_channel); + return status; } static __be32 nfsd4_map_bcts_dir(u32 *dir) @@ -1748,29 +2126,57 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) return nfserr_inval; } +__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc) +{ + struct nfsd4_session *session = cstate->session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; + spin_lock(&nn->client_lock); + session->se_cb_prog = bc->bc_cb_program; + session->se_cb_sec = bc->bc_cb_sec; + spin_unlock(&nn->client_lock); + + nfsd4_probe_callback(session->se_client); + + return nfs_ok; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_bind_conn_to_session *bcts) { __be32 status; + struct nfsd4_conn *conn; + struct nfsd4_session *session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - spin_lock(&client_lock); - cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); - /* Sorta weird: we only need the refcnt'ing because new_conn acquires - * client_lock iself: */ - if (cstate->session) { - nfsd4_get_session(cstate->session); - atomic_inc(&cstate->session->se_client->cl_refcount); - } - spin_unlock(&client_lock); - if (!cstate->session) - return nfserr_badsession; - + nfs4_lock_state(); + spin_lock(&nn->client_lock); + session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); + spin_unlock(&nn->client_lock); + status = nfserr_badsession; + if (!session) + goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); - if (!status) - nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + if (status) + goto out; + conn = alloc_conn(rqstp, bcts->dir); + status = nfserr_jukebox; + if (!conn) + goto out; + nfsd4_init_conn(rqstp, conn, session); + status = nfs_ok; +out: + nfs4_unlock_state(); return status; } @@ -1787,40 +2193,43 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_destroy_session *sessionid) { struct nfsd4_session *ses; - u32 status = nfserr_badsession; - - /* Notes: - * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid - * - Should we return nfserr_back_chan_busy if waiting for - * callbacks on to-be-destroyed session? - * - Do we need to clear any callback info from previous session? - */ + __be32 status; + int ref_held_by_me = 0; + struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); + nfs4_lock_state(); + status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) - return nfserr_not_only_op; + goto out; + ref_held_by_me++; } dump_sessionid(__func__, &sessionid->sessionid); - spin_lock(&client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid); - if (!ses) { - spin_unlock(&client_lock); - goto out; - } - + spin_lock(&nn->client_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); + status = nfserr_badsession; + if (!ses) + goto out_client_lock; + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) + goto out_client_lock; + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); + if (status) + goto out_put_session; unhash_session(ses); - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); - nfs4_lock_state(); nfsd4_probe_callback_sync(ses->se_client); - nfs4_unlock_state(); - - nfsd4_del_conns(ses); - nfsd4_put_session(ses); + spin_lock(&nn->client_lock); status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); +out_client_lock: + spin_unlock(&nn->client_lock); out: - dprintk("%s returns %d\n", __func__, ntohl(status)); + nfs4_unlock_state(); return status; } @@ -1836,26 +2245,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s return NULL; } -static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; + __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); - if (c) { - spin_unlock(&clp->cl_lock); - free_conn(new); - return; - } + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); - return; + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) @@ -1879,10 +2293,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_sequence *seq) { struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_session *session; + struct nfs4_client *clp; struct nfsd4_slot *slot; struct nfsd4_conn *conn; - int status; + __be32 status; + int buflen; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -1895,23 +2313,30 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!conn) return nfserr_jukebox; - spin_lock(&client_lock); + spin_lock(&nn->client_lock); status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid); + session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); if (!session) - goto out; + goto out_no_session; + clp = session->se_client; + status = get_client_locked(clp); + if (status) + goto out_no_session; + status = nfsd4_get_session_locked(session); + if (status) + goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) - goto out; + goto out_put_session; status = nfserr_req_too_big; if (nfsd4_request_too_big(rqstp, session)) - goto out; + goto out_put_session; status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) - goto out; + goto out_put_session; slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); @@ -1921,8 +2346,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, * sr_highest_slotid and the sr_target_slot id to maxslots */ seq->maxslots = session->se_fchannel.maxreqs; - status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, + slot->sl_flags & NFSD4_SLOT_INUSE); if (status == nfserr_replay_cache) { + status = nfserr_seq_misordered; + if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) + goto out_put_session; cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status @@ -1932,70 +2361,75 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; } if (status) - goto out; + goto out_put_session; - nfsd4_sequence_check_conn(conn, session); + status = nfsd4_sequence_check_conn(conn, session); conn = NULL; + if (status) + goto out_put_session; + buflen = (seq->cachethis) ? + session->se_fchannel.maxresp_cached : + session->se_fchannel.maxresp_sz; + status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : + nfserr_rep_too_big; + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) + goto out_put_session; + svc_reserve(rqstp, buflen); + + status = nfs_ok; /* Success! bump slot seqid */ - slot->sl_inuse = true; slot->sl_seqid = seq->seqid; - slot->sl_cachethis = seq->cachethis; + slot->sl_flags |= NFSD4_SLOT_INUSE; + if (seq->cachethis) + slot->sl_flags |= NFSD4_SLOT_CACHETHIS; + else + slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS; cstate->slot = slot; cstate->session = session; out: - /* Hold a session reference until done processing the compound. */ - if (cstate->session) { - struct nfs4_client *clp = session->se_client; - - nfsd4_get_session(cstate->session); - atomic_inc(&clp->cl_refcount); - switch (clp->cl_cb_state) { - case NFSD4_CB_DOWN: - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; - break; - case NFSD4_CB_FAULT: - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; - break; - default: - seq->status_flags = 0; - } + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; } - kfree(conn); - spin_unlock(&client_lock); - dprintk("%s: return %d\n", __func__, ntohl(status)); + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; +out_no_session: + if (conn) + free_conn(conn); + spin_unlock(&nn->client_lock); return status; -} - -static inline bool has_resources(struct nfs4_client *clp) -{ - return !list_empty(&clp->cl_openowners) - || !list_empty(&clp->cl_delegations) - || !list_empty(&clp->cl_sessions); +out_put_session: + nfsd4_put_session(session); +out_put_client: + put_client_renew_locked(clp); + goto out_no_session; } __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { struct nfs4_client *conf, *unconf, *clp; - int status = 0; + __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); - unconf = find_unconfirmed_client(&dc->clientid); - conf = find_confirmed_client(&dc->clientid); + unconf = find_unconfirmed_client(&dc->clientid, true, nn); + conf = find_confirmed_client(&dc->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); if (conf) { clp = conf; - if (!is_client_expired(conf) && has_resources(conf)) { - status = nfserr_clientid_busy; - goto out; - } - - /* rfc5661 18.50.3 */ - if (cstate->session && conf == cstate->session->se_client) { + if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } @@ -2005,18 +2439,20 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_stale_clientid; goto out; } - + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } expire_client(clp); out: nfs4_unlock_state(); - dprintk("%s return %d\n", __func__, ntohl(status)); return status; } __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { - int status = 0; + __be32 status = 0; if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) @@ -2030,7 +2466,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta nfs4_lock_state(); status = nfserr_complete_already; - if (cstate->session->se_client->cl_firststate) + if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags)) goto out; status = nfserr_stale_clientid; @@ -2045,7 +2482,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta goto out; status = nfs_ok; - nfsd4_create_clid_dir(cstate->session->se_client); + nfsd4_client_record_create(cstate->session->se_client); out: nfs4_unlock_state(); return status; @@ -2057,26 +2494,15 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; - char dname[HEXDIR_LEN]; - - status = nfs4_make_rec_clidname(dname, &clname); - if (status) - return status; - - /* - * XXX The Duplicate Request Cache (DRC) has been checked (??) - * We get here on a DRC miss. - */ - - strhashval = clientstr_hashval(dname); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); + conf = find_confirmed_client_by_name(&clname, nn); if (conf) { - /* RFC 3530 14.2.33 CASE 0: */ + /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; @@ -2089,70 +2515,21 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } } - /* - * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") - * has a description of SETCLIENTID request processing consisting - * of 5 bullet points, labeled as CASE0 - CASE4 below. - */ - unconf = find_unconfirmed_client_by_str(dname, strhashval); + unconf = find_unconfirmed_client_by_name(&clname, nn); + if (unconf) + expire_client(unconf); status = nfserr_jukebox; - if (!conf) { - /* - * RFC 3530 14.2.33 CASE 4: - * placed first, because it is the normal case - */ - if (unconf) - expire_client(unconf); - new = create_client(clname, dname, rqstp, &clverifier); - if (new == NULL) - goto out; - gen_clid(new); - } else if (same_verf(&conf->cl_verifier, &clverifier)) { - /* - * RFC 3530 14.2.33 CASE 1: - * probable callback update - */ - if (unconf) { - /* Note this is removing unconfirmed {*x***}, - * which is stronger than RFC recommended {vxc**}. - * This has the advantage that there is at most - * one {*x***} in either list at any time. - */ - expire_client(unconf); - } - new = create_client(clname, dname, rqstp, &clverifier); - if (new == NULL) - goto out; + new = create_client(clname, rqstp, &clverifier); + if (new == NULL) + goto out; + if (conf && same_verf(&conf->cl_verifier, &clverifier)) + /* case 1: probable callback update */ copy_clid(new, conf); - } else if (!unconf) { - /* - * RFC 3530 14.2.33 CASE 2: - * probable client reboot; state will be removed if - * confirmed. - */ - new = create_client(clname, dname, rqstp, &clverifier); - if (new == NULL) - goto out; - gen_clid(new); - } else { - /* - * RFC 3530 14.2.33 CASE 3: - * probable client reboot; state will be removed if - * confirmed. - */ - expire_client(unconf); - new = create_client(clname, dname, rqstp, &clverifier); - if (new == NULL) - goto out; - gen_clid(new); - } - /* - * XXX: we should probably set this at creation time, and check - * for consistent minorversion use throughout: - */ + else /* case 4 (new client) or cases 2, 3 (client reboot): */ + gen_clid(new, nn); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); - add_to_unconfirmed(new, strhashval); + add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); @@ -2163,102 +2540,57 @@ out: } -/* - * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has - * a description of SETCLIENTID_CONFIRM request processing consisting of 4 - * bullets, labeled as CASE1 - CASE4 below. - */ __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if (STALE_CLIENTID(clid)) + if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - /* - * XXX The Duplicate Request Cache (DRC) has been checked (??) - * We get here on a DRC miss. - */ - nfs4_lock_state(); - conf = find_confirmed_client(clid); - unconf = find_unconfirmed_client(clid); - - status = nfserr_clid_inuse; - if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) - goto out; - if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) - goto out; - + conf = find_confirmed_client(clid, false, nn); + unconf = find_unconfirmed_client(clid, false, nn); /* - * section 14.2.34 of RFC 3530 has a description of - * SETCLIENTID_CONFIRM request processing consisting - * of 4 bullet points, labeled as CASE1 - CASE4 below. + * We try hard to give out unique clientid's, so if we get an + * attempt to confirm the same clientid with a different cred, + * there's a bug somewhere. Let's charitably assume it's our + * bug. */ - if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { - /* - * RFC 3530 14.2.34 CASE 1: - * callback update - */ - if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) - status = nfserr_clid_inuse; - else { - nfsd4_change_callback(conf, &unconf->cl_cb_conn); - nfsd4_probe_callback(conf); - expire_client(unconf); - status = nfs_ok; - - } - } else if (conf && !unconf) { - /* - * RFC 3530 14.2.34 CASE 2: - * probable retransmitted request; play it safe and - * do nothing. - */ - if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) - status = nfserr_clid_inuse; - else - status = nfs_ok; - } else if (!conf && unconf - && same_verf(&unconf->cl_confirm, &confirm)) { - /* - * RFC 3530 14.2.34 CASE 3: - * Normal case; new or rebooted client: - */ - if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { - status = nfserr_clid_inuse; - } else { - unsigned int hash = - clientstr_hashval(unconf->cl_recdir); - conf = find_confirmed_client_by_str(unconf->cl_recdir, - hash); - if (conf) { - nfsd4_remove_clid_dir(conf); - expire_client(conf); - } - move_to_confirmed(unconf); - conf = unconf; - nfsd4_probe_callback(conf); + status = nfserr_serverfault; + if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) + goto out; + if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) + goto out; + /* cases below refer to rfc 3530 section 14.2.34: */ + if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { + if (conf && !unconf) /* case 2: probable retransmit */ status = nfs_ok; + else /* case 4: client hasn't noticed we rebooted yet? */ + status = nfserr_stale_clientid; + goto out; + } + status = nfs_ok; + if (conf) { /* case 1: callback update */ + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); + expire_client(unconf); + } else { /* case 3: normal case; new or rebooted client */ + conf = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (conf) { + status = mark_client_expired(conf); + if (status) + goto out; + expire_client(conf); } - } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) - && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, - &confirm)))) { - /* - * RFC 3530 14.2.34 CASE 4: - * Client probably hasn't noticed that we rebooted yet. - */ - status = nfserr_stale_clientid; - } else { - /* check that we have hit one of the cases...*/ - status = nfserr_clid_inuse; + move_to_confirmed(unconf); + nfsd4_probe_callback(unconf); } out: nfs4_unlock_state(); @@ -2276,7 +2608,6 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) unsigned int hashval = file_hashval(ino); atomic_set(&fp->fi_ref, 1); - INIT_LIST_HEAD(&fp->fi_hash); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); fp->fi_inode = igrab(ino); @@ -2284,28 +2615,19 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&recall_lock); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); -} - -static void -nfsd4_free_slab(struct kmem_cache **slab) -{ - if (*slab == NULL) - return; - kmem_cache_destroy(*slab); - *slab = NULL; + spin_lock(&state_lock); + hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&state_lock); } void nfsd4_free_slabs(void) { - nfsd4_free_slab(&openowner_slab); - nfsd4_free_slab(&lockowner_slab); - nfsd4_free_slab(&file_slab); - nfsd4_free_slab(&stateid_slab); - nfsd4_free_slab(&deleg_slab); + kmem_cache_destroy(openowner_slab); + kmem_cache_destroy(lockowner_slab); + kmem_cache_destroy(file_slab); + kmem_cache_destroy(stateid_slab); + kmem_cache_destroy(deleg_slab); } int @@ -2314,42 +2636,38 @@ nfsd4_init_slabs(void) openowner_slab = kmem_cache_create("nfsd4_openowners", sizeof(struct nfs4_openowner), 0, 0, NULL); if (openowner_slab == NULL) - goto out_nomem; + goto out; lockowner_slab = kmem_cache_create("nfsd4_lockowners", - sizeof(struct nfs4_openowner), 0, 0, NULL); + sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) - goto out_nomem; + goto out_free_openowner_slab; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) - goto out_nomem; + goto out_free_lockowner_slab; stateid_slab = kmem_cache_create("nfsd4_stateids", sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) - goto out_nomem; + goto out_free_file_slab; deleg_slab = kmem_cache_create("nfsd4_delegations", sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) - goto out_nomem; + goto out_free_stateid_slab; return 0; -out_nomem: - nfsd4_free_slabs(); + +out_free_stateid_slab: + kmem_cache_destroy(stateid_slab); +out_free_file_slab: + kmem_cache_destroy(file_slab); +out_free_lockowner_slab: + kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: + kmem_cache_destroy(openowner_slab); +out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } -void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); -} - -void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); -} - static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -2380,7 +2698,9 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } @@ -2403,9 +2723,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -2414,17 +2733,19 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - __set_bit(open->op_share_access, &stp->st_access_bmap); - __set_bit(open->op_share_deny, &stp->st_deny_bmap); + set_access(open->op_share_access, stp); + set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; } static void -move_to_close_lru(struct nfs4_openowner *oo) +move_to_close_lru(struct nfs4_openowner *oo, struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); - list_move_tail(&oo->oo_close_lru, &close_lru); + list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); } @@ -2438,16 +2759,21 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, } static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) { struct nfs4_stateowner *so; struct nfs4_openowner *oo; + struct nfs4_client *clp; - list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (!so->so_is_open_owner) continue; if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { oo = openowner(so); + clp = oo->oo_owner.so_client; + if ((bool)clp->cl_minorversion != sessions) + return NULL; renew_client(oo->oo_owner.so_client); return oo; } @@ -2462,15 +2788,15 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - spin_lock(&recall_lock); - list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + spin_lock(&state_lock); + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return fp; } } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return NULL; } @@ -2486,16 +2812,14 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_ol_stateid *stp; __be32 ret; - dprintk("NFSD: nfs4_share_conflict\n"); - fp = find_file(ino); if (!fp) return nfs_ok; ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_bit(deny_type, &stp->st_deny_bmap) || - test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + if (test_deny(deny_type, stp) || + test_deny(NFS4_SHARE_DENY_BOTH, stp)) goto out; } ret = nfs_ok; @@ -2506,6 +2830,10 @@ out: static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&state_lock); /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2513,23 +2841,30 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - /* only place dl_time is set. protected by lock_flocks*/ + /* Only place dl_time is set; protected by i_lock: */ dp->dl_time = get_seconds(); + block_delegations(&dp->dl_fh); + nfsd4_cb_recall(dp); } -/* Called from break_lease() with lock_flocks() held. */ +/* Called from break_lease() with i_lock held. */ static void nfsd_break_deleg_cb(struct file_lock *fl) { struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_delegation *dp; - BUG_ON(!fp); - /* We assume break_lease is only called once per lease: */ - BUG_ON(fp->fi_had_conflict); + if (!fp) { + WARN(1, "(%p)->fl_owner NULL\n", fl); + return; + } + if (fp->fi_had_conflict) { + WARN(1, "duplicate break on %p\n", fp); + return; + } /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -2537,11 +2872,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&recall_lock); + spin_lock(&state_lock); fp->fi_had_conflict = true; list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); - spin_unlock(&recall_lock); + spin_unlock(&state_lock); } static @@ -2571,7 +2906,7 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, - struct nfsd4_open *open) + struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; @@ -2579,7 +2914,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfs4_openowner *oo = NULL; __be32 status; - if (STALE_CLIENTID(&open->op_clientid)) + if (STALE_CLIENTID(&open->op_clientid, nn)) return nfserr_stale_clientid; /* * In case we need it later, after we've already created the @@ -2590,10 +2925,11 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, return nfserr_jukebox; strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open); + oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid); + clp = find_confirmed_client(clientid, cstate->minorversion, + nn); if (clp == NULL) return nfserr_expired; goto new_owner; @@ -2633,8 +2969,6 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) static int share_access_to_flags(u32 share_access) { - share_access &= ~NFS4_SHARE_WANT_MASK; - return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } @@ -2655,7 +2989,7 @@ static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) } static __be32 -nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, +nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; @@ -2697,11 +3031,6 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st return nfs_ok; } -static void nfs4_free_stateid(struct nfs4_ol_stateid *s) -{ - kmem_cache_free(stateid_slab, s); -} - static inline int nfs4_access_to_access(u32 nfs4_access) { int flags = 0; @@ -2753,7 +3082,7 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c bool new_access; __be32 status; - new_access = !test_bit(op_share_access, &stp->st_access_bmap); + new_access = !test_access(op_share_access, stp); if (new_access) { status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); if (status) @@ -2768,18 +3097,17 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c return status; } /* remember the open */ - __set_bit(op_share_access, &stp->st_access_bmap); - __set_bit(open->op_share_deny, &stp->st_deny_bmap); + set_access(op_share_access, stp); + set_deny(open->op_share_deny, stp); return nfs_ok; } static void -nfs4_set_claim_prev(struct nfsd4_open *open) +nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) { open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - open->op_openowner->oo_owner.so_client->cl_firststate = 1; } /* Should we give out recallable state?: */ @@ -2804,7 +3132,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return NULL; locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; + fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -2812,91 +3140,124 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } -static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, flag); + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); - if (status) { - list_del_init(&dp->dl_perclnt); - locks_free_lock(fl); - return -ENOMEM; - } + if (status) + goto out_free; fp->fi_lease = fl; - fp->fi_deleg_file = fl->fl_file; - get_file(fp->fi_deleg_file); + fp->fi_deleg_file = get_file(fl->fl_file); atomic_set(&fp->fi_delegees, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_lock(&state_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); return 0; +out_free: + locks_free_lock(fl); + return status; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { - struct nfs4_file *fp = dp->dl_file; - + if (fp->fi_had_conflict) + return -EAGAIN; + get_nfs4_file(fp); + dp->dl_file = fp; if (!fp->fi_lease) - return nfs4_setlease(dp, flag); - spin_lock(&recall_lock); + return nfs4_setlease(dp); + spin_lock(&state_lock); + atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&recall_lock); + spin_unlock(&state_lock); return -EAGAIN; } - atomic_inc(&fp->fi_delegees); - list_add(&dp->dl_perfile, &fp->fi_delegations); - spin_unlock(&recall_lock); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); return 0; } +static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) +{ + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (status == -EAGAIN) + open->op_why_no_deleg = WND4_CONTENTION; + else { + open->op_why_no_deleg = WND4_RESOURCE; + switch (open->op_deleg_want) { + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + break; + case NFS4_SHARE_WANT_CANCEL: + open->op_why_no_deleg = WND4_CANCELLED; + break; + case NFS4_SHARE_WANT_NO_DELEG: + WARN_ON_ONCE(1); + } + } +} + /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct net *net, struct svc_fh *fh, + struct nfsd4_open *open, struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status, flag = 0; + int status = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ - if (locks_in_grace()) - goto out; + case NFS4_OPEN_CLAIM_FH: + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ + if (locks_in_grace(net)) + goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) - goto out; + goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } - - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp, flag); + status = nfs4_set_delegation(dp, stp->st_file); if (status) goto out_free; @@ -2904,18 +3265,40 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_ dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); -out: - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS - && flag == NFS4_OPEN_DELEGATE_NONE - && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - open->op_delegate_type = flag; + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - nfs4_put_delegation(dp); + destroy_delegation(dp); out_no_deleg: - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; + } + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; +} + +static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, + struct nfs4_delegation *dp) +{ + if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } + /* Otherwise the client must be confused wanting a delegation + * it already has, therefore we don't return + * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + */ } /* @@ -2941,7 +3324,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; - status = nfs4_check_deleg(cl, fp, open, &dp); + status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; } else { @@ -2967,36 +3350,46 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); if (status) goto out; + status = nfsd4_truncate(rqstp, current_fh, open); + if (status) + goto out; stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); - status = nfsd4_truncate(rqstp, current_fh, open); - if (status) { - release_open_stateid(stp); - goto out; - } } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(&resp->cstate)) + if (nfsd4_has_session(&resp->cstate)) { open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_WANTED; + goto nodeleg; + } + } + /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); - + nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); +nodeleg: status = nfs_ok; dprintk("%s: stateid=" STATEID_FMT "\n", __func__, STATEID_VAL(&stp->st_stid.sc_stateid)); out: + /* 4.1 client trying to upgrade/downgrade delegation? */ + if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + open->op_deleg_want) + nfsd4_deleg_xgrade_none_ext(open, dp); + if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - nfs4_set_claim_prev(open); + nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); /* * To finish the open response, we just need to set the rflags. */ @@ -3026,7 +3419,19 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) if (open->op_file) nfsd4_free_file(open->op_file); if (open->op_stp) - nfs4_free_stateid(open->op_stp); + free_generic_stateid(open->op_stp); +} + +static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) +{ + struct nfs4_client *found; + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + found = find_confirmed_client(clid, session, nn); + if (clp) + *clp = found; + return found ? nfs_ok : nfserr_expired; } __be32 @@ -3035,20 +3440,14 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_client *clp; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) - goto out; - clp = find_confirmed_client(clid); - status = nfserr_expired; - if (clp == NULL) { - /* We assume the client took too long to RENEW. */ - dprintk("nfsd4_renew: clientid not found!\n"); + status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + if (status) goto out; - } status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) @@ -3059,110 +3458,108 @@ out: return status; } -static struct lock_manager nfsd4_manager = { -}; - static void -nfsd4_end_grace(void) +nfsd4_end_grace(struct nfsd_net *nn) { + /* do nothing if grace period already ended */ + if (nn->grace_ended) + return; + dprintk("NFSD: end of grace period\n"); - nfsd4_recdir_purge_old(); - locks_end_grace(&nfsd4_manager); + nn->grace_ended = true; + nfsd4_record_grace_done(nn, nn->boot_time); + locks_end_grace(&nn->nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and * to see the (possibly new, possibly shorter) lease time, we * can safely set the next grace time to the current lease time: */ - nfsd4_grace = nfsd4_lease; + nn->nfsd4_grace = nn->nfsd4_lease; } static time_t -nfs4_laundromat(void) +nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nfsd4_lease; - time_t t, clientid_val = nfsd4_lease; - time_t u, test_val = nfsd4_lease; + time_t cutoff = get_seconds() - nn->nfsd4_lease; + time_t t, new_timeo = nn->nfsd4_lease; nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - if (locks_in_grace()) - nfsd4_end_grace(); + nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); - spin_lock(&client_lock); - list_for_each_safe(pos, next, &client_lru) { + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (clientid_val > t) - clientid_val = t; + new_timeo = min(new_timeo, t); break; } - if (atomic_read(&clp->cl_refcount)) { + if (mark_client_expired_locked(clp)) { dprintk("NFSD: client in use (clientid %08x)\n", clp->cl_clientid.cl_id); continue; } - unhash_client_locked(clp); - list_add(&clp->cl_lru, &reaplist); + list_move(&clp->cl_lru, &reaplist); } - spin_unlock(&client_lock); + spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); - nfsd4_remove_clid_dir(clp); expire_client(clp); } - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) + continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { - u = dp->dl_time - cutoff; - if (test_val > u) - test_val = u; + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; } list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + revoke_delegation(dp); } - test_val = nfsd4_lease; - list_for_each_safe(pos, next, &close_lru) { + list_for_each_safe(pos, next, &nn->close_lru) { oo = container_of(pos, struct nfs4_openowner, oo_close_lru); if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { - u = oo->oo_time - cutoff; - if (test_val > u) - test_val = u; + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; } release_openowner(oo); } - if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) - clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); - return clientid_val; + return new_timeo; } static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); static void -laundromat_main(struct work_struct *not_used) +laundromat_main(struct work_struct *laundry) { time_t t; + struct delayed_work *dwork = container_of(laundry, struct delayed_work, + work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + laundromat_work); - t = nfs4_laundromat(); + t = nfs4_laundromat(nn); dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); - queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); + queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) @@ -3172,29 +3569,19 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s return nfs_ok; } -static int -STALE_STATEID(stateid_t *stateid) -{ - if (stateid->si_opaque.so_clid.cl_boot == boot_time) - return 0; - dprintk("NFSD: stale stateid " STATEID_FMT "!\n", - STATEID_VAL(stateid)); - return 1; -} - static inline int -access_permit_read(unsigned long access_bmap) +access_permit_read(struct nfs4_ol_stateid *stp) { - return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); + return test_access(NFS4_SHARE_ACCESS_READ, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp) || + test_access(NFS4_SHARE_ACCESS_WRITE, stp); } static inline int -access_permit_write(unsigned long access_bmap) +access_permit_write(struct nfs4_ol_stateid *stp) { - return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); + return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp); } static @@ -3205,9 +3592,9 @@ __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) /* For lock stateid's, we test the parent open, not the lock: */ if (stp->st_openstp) stp = stp->st_openstp; - if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) + if ((flags & WR_STATE) && !access_permit_write(stp)) goto out; - if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) + if ((flags & RD_STATE) && !access_permit_read(stp)) goto out; status = nfs_ok; out: @@ -3215,11 +3602,11 @@ out: } static inline __be32 -check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) +check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags) { if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (locks_in_grace()) { + else if (locks_in_grace(net)) { /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -3236,18 +3623,18 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) * that are not able to provide mandatory locking. */ static inline int -grace_disallows_io(struct inode *inode) +grace_disallows_io(struct net *net, struct inode *inode) { - return locks_in_grace() && mandatory_lock(inode); + return locks_in_grace(net) && mandatory_lock(inode); } /* Returns true iff a is later than b: */ static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return (s32)a->si_generation - (s32)b->si_generation > 0; + return (s32)(a->si_generation - b->si_generation) > 0; } -static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) +static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* * When sessions are used the stateid generation number is ignored @@ -3275,53 +3662,78 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_sess return nfserr_old_stateid; } -__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) +static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; struct nfs4_ol_stateid *ols; __be32 status; - if (STALE_STATEID(stateid)) - return nfserr_stale_stateid; - + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + /* Client debugging aid. */ + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, + sizeof(addr_str)); + pr_warn_ratelimited("NFSD: client %s testing state ID " + "with incorrect client ID\n", addr_str); + return nfserr_bad_stateid; + } s = find_stateid(cl, stateid); if (!s) - return nfserr_stale_stateid; + return nfserr_bad_stateid; status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) return status; - if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) + switch (s->sc_type) { + case NFS4_DELEG_STID: + return nfs_ok; + case NFS4_REVOKED_DELEG_STID: + return nfserr_deleg_revoked; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ols = openlockstateid(s); + if (ols->st_stateowner->so_is_open_owner + && !(openowner(ols->st_stateowner)->oo_flags + & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; return nfs_ok; - ols = openlockstateid(s); - if (ols->st_stateowner->so_is_open_owner - && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + default: + printk("unknown stateid type %x\n", s->sc_type); + case NFS4_CLOSED_STID: return nfserr_bad_stateid; - return nfs_ok; + } } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, bool sessions, + struct nfsd_net *nn) { struct nfs4_client *cl; + __be32 status; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - if (STALE_STATEID(stateid)) + status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, + nn, &cl); + if (status == nfserr_stale_clientid) { + if (sessions) + return nfserr_bad_stateid; return nfserr_stale_stateid; - cl = find_confirmed_client(&stateid->si_opaque.so_clid); - if (!cl) - return nfserr_expired; + } + if (status) + return status; *s = find_stateid_by_type(cl, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; - } /* * Checks for stateid operations */ __be32 -nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, +nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { struct nfs4_stid *s; @@ -3329,20 +3741,25 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp = NULL; struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct file *file = NULL; __be32 status; if (filpp) *filpp = NULL; - if (grace_disallows_io(ino)) + if (grace_disallows_io(net, ino)) return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(current_fh, stateid, flags); + return check_special_stateids(net, current_fh, stateid, flags); + + nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, cstate->minorversion, nn); if (status) - return status; + goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3353,8 +3770,12 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - *filpp = dp->dl_file->fi_deleg_file; - BUG_ON(!*filpp); + file = dp->dl_file->fi_deleg_file; + if (!file) { + WARN_ON_ONCE(1); + status = nfserr_serverfault; + goto out; + } } break; case NFS4_OPEN_STID: @@ -3371,25 +3792,36 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, goto out; if (filpp) { if (flags & RD_STATE) - *filpp = find_readable_file(stp->st_file); + file = find_readable_file(stp->st_file); else - *filpp = find_writeable_file(stp->st_file); + file = find_writeable_file(stp->st_file); } break; default: - return nfserr_bad_stateid; + status = nfserr_bad_stateid; + goto out; } status = nfs_ok; + if (file) + *filpp = get_file(file); out: + nfs4_unlock_state(); return status; } static __be32 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { - if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - release_lock_stateid(stp); + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + release_lockowner(lo); return nfs_ok; } @@ -3400,7 +3832,15 @@ __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_test_stateid *test_stateid) { - /* real work is done during encoding */ + struct nfsd4_test_stateid_id *stateid; + struct nfs4_client *cl = cstate->session->se_client; + + nfs4_lock_state(); + list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) + stateid->ts_id_status = + nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); + nfs4_unlock_state(); + return nfs_ok; } @@ -3410,6 +3850,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; + struct nfs4_delegation *dp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; @@ -3431,6 +3872,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, else ret = nfserr_locks_held; break; + case NFS4_REVOKED_DELEG_STID: + dp = delegstateid(s); + destroy_revoked_delegation(dp); + ret = nfs_ok; + break; default: ret = nfserr_bad_stateid; } @@ -3455,10 +3901,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ status = nfsd4_check_seqid(cstate, sop, seqid); if (status) return status; - if (stp->st_stid.sc_type == NFS4_CLOSED_STID) + if (stp->st_stid.sc_type == NFS4_CLOSED_STID + || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) /* * "Closed" stateid's exist *only* to return - * nfserr_replay_me from the previous step. + * nfserr_replay_me from the previous step, and + * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); @@ -3473,7 +3921,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, char typemask, - struct nfs4_ol_stateid **stpp) + struct nfs4_ol_stateid **stpp, + struct nfsd_net *nn) { __be32 status; struct nfs4_stid *s; @@ -3482,22 +3931,25 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s); + status = nfsd4_lookup_stateid(stateid, typemask, &s, + cstate->minorversion, nn); if (status) return status; *stpp = openlockstateid(s); - cstate->replay_owner = (*stpp)->st_stateowner; + if (!nfsd4_has_session(cstate)) + cstate->replay_owner = (*stpp)->st_stateowner; return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); } -static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) { __be32 status; struct nfs4_openowner *oo; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp); + NFS4_OPEN_STID, stpp, nn); if (status) return status; oo = openowner((*stpp)->st_stateowner); @@ -3513,10 +3965,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_confirm on file %pd\n", + cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -3526,7 +3978,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, - NFS4_OPEN_STID, &stp); + NFS4_OPEN_STID, &stp, nn); if (status) goto out; oo = openowner(stp->st_stateowner); @@ -3539,9 +3991,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - nfsd4_create_clid_dir(oo->oo_owner.so_client); + nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; @@ -3549,10 +4002,10 @@ out: static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) { - if (!test_bit(access, &stp->st_access_bmap)) + if (!test_access(access, stp)) return; nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); - __clear_bit(access, &stp->st_access_bmap); + clear_access(access, stp); } static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) @@ -3569,17 +4022,17 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac case NFS4_SHARE_ACCESS_BOTH: break; default: - BUG(); + WARN_ON_ONCE(1); } } static void -reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) +reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) { int i; for (i = 0; i < 4; i++) { if ((i & deny) != i) - __clear_bit(i, bmap); + clear_deny(i, stp); } } @@ -3590,63 +4043,46 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, { __be32 status; struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", + cstate->current_fh.fh_dentry); /* We don't yet support WANT bits: */ - od->od_share_access &= NFS4_SHARE_ACCESS_MASK; + if (od->od_deleg_want) + dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, + od->od_deleg_want); nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, - &od->od_stateid, &stp); + &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; - if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { - dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", + if (!test_access(od->od_share_access, stp)) { + dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto out; } - if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { + if (!test_deny(od->od_share_deny, stp)) { dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto out; } nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); + reset_union_bmap_deny(od->od_share_deny, stp); update_stateid(&stp->st_stid.sc_stateid); memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); return status; } -void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) -{ - struct nfs4_openowner *oo; - struct nfs4_ol_stateid *s; - - if (!so->so_is_open_owner) - return; - oo = openowner(so); - s = oo->oo_last_closed_stid; - if (!s) - return; - if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { - /* Release the last_closed_stid on the next seqid bump: */ - oo->oo_flags |= NFS4_OO_PURGE_CLOSE; - return; - } - oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; - release_last_closed_stateid(oo); -} - static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { unhash_open_stateid(s); @@ -3663,32 +4099,42 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_close on file %pd\n", + cstate->current_fh.fh_dentry); nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, - &stp); + &stp, nn); + nfsd4_bump_seqid(cstate, status); if (status) goto out; oo = openowner(stp->st_stateowner); - status = nfs_ok; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - oo->oo_last_closed_stid = stp; - /* place unused nfs4_stateowners on so_close_lru list to be - * released by the laundromat service after the lease period - * to enable us to handle CLOSE replay - */ - if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo); + if (cstate->minorversion) + free_generic_stateid(stp); + else + oo->oo_last_closed_stid = stp; + + if (list_empty(&oo->oo_owner.so_stateids)) { + if (cstate->minorversion) + release_openowner(oo); + else { + /* + * In the 4.0 case we need to keep the owners around a + * little while to handle CLOSE replay. + */ + move_to_close_lru(oo, SVC_NET(rqstp)); + } + } out: if (!cstate->replay_owner) nfs4_unlock_state(); @@ -3702,15 +4148,15 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; struct nfs4_stid *s; - struct inode *inode; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - inode = cstate->current_fh.fh_dentry->d_inode; nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, + cstate->minorversion, nn); if (status) goto out; dp = delegstateid(s); @@ -3718,7 +4164,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - unhash_delegation(dp); + destroy_delegation(dp); out: nfs4_unlock_state(); @@ -3728,8 +4174,6 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) #define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) static inline u64 @@ -3747,7 +4191,7 @@ last_byte_offset(u64 start, u64 len) { u64 end; - BUG_ON(!len); + WARN_ON_ONCE(!len); end = start + len; return end > start ? end - 1: NFS4_MAX_UINT64; } @@ -3759,8 +4203,6 @@ static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct & LOCKOWNER_INO_HASH_MASK; } -static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE]; - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -3819,6 +4261,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c if (!same_owner_str(&lo->lo_owner, owner, clid)) return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } lst = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); return lst->st_file->fi_inode == inode; @@ -3826,12 +4272,12 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner) + struct xdr_netobj *owner, struct nfsd_net *nn) { unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { if (same_lockowner_ino(lo, inode, clid, owner)) return lo; } @@ -3843,9 +4289,10 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s struct inode *inode = open_stp->st_file->fi_inode; unsigned int inohash = lockowner_ino_hashval(inode, clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]); + list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } @@ -3882,7 +4329,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; @@ -3906,21 +4353,23 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) struct nfs4_file *fp = lock_stp->st_file; int oflag = nfs4_access_to_omode(access); - if (test_bit(access, &lock_stp->st_access_bmap)) + if (test_access(access, lock_stp)) return; nfs4_file_get_access(fp, oflag); - __set_bit(access, &lock_stp->st_access_bmap); + set_access(access, lock_stp); } -__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) { struct nfs4_file *fi = ost->st_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; struct nfs4_lockowner *lo; unsigned int strhashval; + struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner); + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, + &lock->v.new.owner, nn); if (lo) { if (!cstate->minorversion) return nfserr_bad_seqid; @@ -3953,14 +4402,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; struct nfs4_ol_stateid *lock_stp; - struct nfs4_file *fp; struct file *filp = NULL; - struct file_lock file_lock; - struct file_lock conflock; + struct file_lock *file_lock = NULL; + struct file_lock *conflock = NULL; __be32 status = 0; bool new_state = false; int lkflg; int err; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, @@ -3978,11 +4428,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); if (lock->lk_is_new) { - /* - * Client indicates that this is a new lockowner. - * Use open owner and open stateid to create lock owner and - * lock stateid. - */ struct nfs4_ol_stateid *open_stp = NULL; if (nfsd4_has_session(cstate)) @@ -3992,14 +4437,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, sizeof(clientid_t)); status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid, nn)) goto out; /* validate and update open stateid and open seqid */ status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - &open_stp); + &open_stp, nn); if (status) goto out; open_sop = openowner(open_stp->st_stateowner); @@ -4009,19 +4454,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new_state); - if (status) - goto out; - } else { - /* lock (lock owner + lock stateid) already exists */ + } else status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, - NFS4_LOCK_STID, &lock_stp); - if (status) - goto out; - } + NFS4_LOCK_STID, &lock_stp, nn); + if (status) + goto out; lock_sop = lockowner(lock_stp->st_stateowner); - fp = lock_stp->st_file; lkflg = setlkflg(lock->lk_type); status = nfs4_check_openmode(lock_stp, lkflg); @@ -4029,27 +4469,34 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfserr_grace; - if (locks_in_grace() && !lock->lk_reclaim) + if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!locks_in_grace() && lock->lk_reclaim) + if (!locks_in_grace(net) && lock->lk_reclaim) goto out; - locks_init_lock(&file_lock); + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } + + locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: filp = find_readable_file(lock_stp->st_file); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); - file_lock.fl_type = F_RDLCK; + file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: filp = find_writeable_file(lock_stp->st_file); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); - file_lock.fl_type = F_WRLCK; + file_lock->fl_type = F_WRLCK; break; default: status = nfserr_inval; @@ -4059,22 +4506,23 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_openmode; goto out; } - file_lock.fl_owner = (fl_owner_t)lock_sop; - file_lock.fl_pid = current->tgid; - file_lock.fl_file = filp; - file_lock.fl_flags = FL_POSIX; - file_lock.fl_lmops = &nfsd_posix_mng_ops; - - file_lock.fl_start = lock->lk_offset; - file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); - nfs4_transform_lock_offset(&file_lock); - - /* - * Try to lock the file in the VFS. - * Note: locks.c uses the BKL to protect the inode's lock list. - */ + file_lock->fl_owner = (fl_owner_t)lock_sop; + file_lock->fl_pid = current->tgid; + file_lock->fl_file = filp; + file_lock->fl_flags = FL_POSIX; + file_lock->fl_lmops = &nfsd_posix_mng_ops; + file_lock->fl_start = lock->lk_offset; + file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); + nfs4_transform_lock_offset(file_lock); + + conflock = locks_alloc_lock(); + if (!conflock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } - err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); + err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ update_stateid(&lock_stp->st_stid.sc_stateid); @@ -4085,7 +4533,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case (EAGAIN): /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); - nfs4_set_lock_denied(&conflock, &lock->lk_denied); + nfs4_set_lock_denied(conflock, &lock->lk_denied); break; case (EDEADLK): status = nfserr_deadlock; @@ -4098,8 +4546,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (status && new_state) release_lockowner(lock_sop); + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); + if (conflock) + locks_free_lock(conflock); return status; } @@ -4109,16 +4562,14 @@ out: * vfs_test_lock. (Arguably perhaps test_lock should be done with an * inode operation.) */ -static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) +static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct file *file; - int err; - - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); - if (err) - return err; - err = vfs_test_lock(file, lock); - nfsd_close(file); + __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + if (!err) { + err = nfserrno(vfs_test_lock(file, lock)); + nfsd_close(file); + } return err; } @@ -4130,12 +4581,12 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { struct inode *inode; - struct file_lock file_lock; + struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo; - int error; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if (locks_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) @@ -4143,23 +4594,31 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - status = nfserr_stale_clientid; - if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) - goto out; + if (!nfsd4_has_session(cstate)) { + status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + if (status) + goto out; + } if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; inode = cstate->current_fh.fh_dentry->d_inode; - locks_init_lock(&file_lock); + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } + locks_init_lock(file_lock); switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: - file_lock.fl_type = F_RDLCK; + file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - file_lock.fl_type = F_WRLCK; + file_lock->fl_type = F_WRLCK; break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); @@ -4167,29 +4626,29 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); if (lo) - file_lock.fl_owner = (fl_owner_t)lo; - file_lock.fl_pid = current->tgid; - file_lock.fl_flags = FL_POSIX; + file_lock->fl_owner = (fl_owner_t)lo; + file_lock->fl_pid = current->tgid; + file_lock->fl_flags = FL_POSIX; - file_lock.fl_start = lockt->lt_offset; - file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); + file_lock->fl_start = lockt->lt_offset; + file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); - nfs4_transform_lock_offset(&file_lock); + nfs4_transform_lock_offset(file_lock); - status = nfs_ok; - error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); - if (error) { - status = nfserrno(error); + status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock); + if (status) goto out; - } - if (file_lock.fl_type != F_UNLCK) { + + if (file_lock->fl_type != F_UNLCK) { status = nfserr_denied; - nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); + nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); return status; } @@ -4199,10 +4658,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_ol_stateid *stp; struct file *filp = NULL; - struct file_lock file_lock; + struct file_lock *file_lock = NULL; __be32 status; int err; - + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); @@ -4213,7 +4673,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, - &locku->lu_stateid, NFS4_LOCK_STID, &stp); + &locku->lu_stateid, NFS4_LOCK_STID, + &stp, nn); if (status) goto out; filp = find_any_file(stp->st_file); @@ -4221,36 +4682,39 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_lock_range; goto out; } - BUG_ON(!filp); - locks_init_lock(&file_lock); - file_lock.fl_type = F_UNLCK; - file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); - file_lock.fl_pid = current->tgid; - file_lock.fl_file = filp; - file_lock.fl_flags = FL_POSIX; - file_lock.fl_lmops = &nfsd_posix_mng_ops; - file_lock.fl_start = locku->lu_offset; - - file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length); - nfs4_transform_lock_offset(&file_lock); - - /* - * Try to unlock the file in the VFS. - */ - err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL); + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } + locks_init_lock(file_lock); + file_lock->fl_type = F_UNLCK; + file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); + file_lock->fl_pid = current->tgid; + file_lock->fl_file = filp; + file_lock->fl_flags = FL_POSIX; + file_lock->fl_lmops = &nfsd_posix_mng_ops; + file_lock->fl_start = locku->lu_offset; + + file_lock->fl_end = last_byte_offset(locku->lu_offset, + locku->lu_length); + nfs4_transform_lock_offset(file_lock); + + err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - /* - * OK, unlock succeeded; the only thing left to do is update the stateid. - */ update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); out: + nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); return status; out_nfserr: @@ -4270,7 +4734,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) struct inode *inode = filp->fi_inode; int status = 0; - lock_flocks(); + spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { status = 1; @@ -4278,7 +4742,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) } } out: - unlock_flocks(); + spin_unlock(&inode->i_lock); return status; } @@ -4295,22 +4759,21 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct list_head matches; unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - /* XXX check for lease expiration */ - - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) - return status; - nfs4_lock_state(); + status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + if (status) + goto out; + status = nfserr_locks_held; INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (sop->so_is_open_owner) continue; if (!same_owner_str(sop, owner, clid)) @@ -4346,78 +4809,74 @@ alloc_reclaim(void) return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } -int -nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) +bool +nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) { - unsigned int strhashval = clientstr_hashval(name); - struct nfs4_client *clp; + struct nfs4_client_reclaim *crp; - clp = find_confirmed_client_by_str(name, strhashval); - return clp ? 1 : 0; + crp = nfsd4_find_reclaim_client(name, nn); + return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... */ -int -nfs4_client_to_reclaim(const char *name) +struct nfs4_client_reclaim * +nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client_reclaim *crp = NULL; + struct nfs4_client_reclaim *crp; dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); crp = alloc_reclaim(); - if (!crp) - return 0; - strhashval = clientstr_hashval(name); - INIT_LIST_HEAD(&crp->cr_strhash); - list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); - reclaim_str_hashtbl_size++; - return 1; + if (crp) { + strhashval = clientstr_hashval(name); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_clp = NULL; + nn->reclaim_str_hashtbl_size++; + } + return crp; } -static void -nfs4_release_reclaim(void) +void +nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) +{ + list_del(&crp->cr_strhash); + kfree(crp); + nn->reclaim_str_hashtbl_size--; +} + +void +nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&reclaim_str_hashtbl[i])) { - crp = list_entry(reclaim_str_hashtbl[i].next, + while (!list_empty(&nn->reclaim_str_hashtbl[i])) { + crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); - list_del(&crp->cr_strhash); - kfree(crp); - reclaim_str_hashtbl_size--; + nfs4_remove_reclaim_record(crp, nn); } } - BUG_ON(reclaim_str_hashtbl_size); + WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ -static struct nfs4_client_reclaim * -nfs4_find_reclaim_client(clientid_t *clid) +struct nfs4_client_reclaim * +nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client *clp; struct nfs4_client_reclaim *crp = NULL; + dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); - /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid); - if (clp == NULL) - return NULL; - - dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", - clp->cl_name.len, clp->cl_name.data, - clp->cl_recdir); - - /* find clp->cl_name in reclaim_str_hashtbl */ - strhashval = clientstr_hashval(clp->cl_recdir); - list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, clp->cl_recdir)) { + strhashval = clientstr_hashval(recdir); + list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { + if (same_name(crp->cr_recdir, recdir)) { return crp; } } @@ -4428,168 +4887,198 @@ nfs4_find_reclaim_client(clientid_t *clid) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid) +nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; + struct nfs4_client *clp; + + /* find clientid in conf_id_hashtbl */ + clp = find_confirmed_client(clid, sessions, nn); + if (clp == NULL) + return nfserr_reclaim_bad; + + return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; } #ifdef CONFIG_NFSD_FAULT_INJECTION -void nfsd_forget_clients(u64 num) +u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { - struct nfs4_client *clp, *next; - int count = 0; - - nfs4_lock_state(); - list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { - nfsd4_remove_clid_dir(clp); - expire_client(clp); - if (++count == num) - break; - } - nfs4_unlock_state(); - - printk(KERN_INFO "NFSD: Forgot %d clients", count); + if (mark_client_expired(clp)) + return 0; + expire_client(clp); + return 1; } -static void release_lockowner_sop(struct nfs4_stateowner *sop) +u64 nfsd_print_client(struct nfs4_client *clp, u64 num) { - release_lockowner(lockowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s\n", buf); + return 1; } -static void release_openowner_sop(struct nfs4_stateowner *sop) +static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, + const char *type) { - release_openowner(openowner(sop)); + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static int nfsd_release_n_owners(u64 num, bool is_open_owner, - void (*release_sop)(struct nfs4_stateowner *)) +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) { - int i, count = 0; - struct nfs4_stateowner *sop, *next; + struct nfs4_openowner *oop; + struct nfs4_lockowner *lop, *lo_next; + struct nfs4_ol_stateid *stp, *st_next; + u64 count = 0; - for (i = 0; i < OWNER_HASH_SIZE; i++) { - list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) { - if (sop->so_is_open_owner != is_open_owner) - continue; - release_sop(sop); - if (++count == num) - return count; + list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { + list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { + if (func) + func(lop); + if (++count == max) + return count; + } } } + return count; } -void nfsd_forget_locks(u64 num) +u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) { - int count; - - nfs4_lock_state(); - count = nfsd_release_n_owners(num, false, release_lockowner_sop); - nfs4_unlock_state(); + return nfsd_foreach_client_lock(clp, max, release_lockowner); +} - printk(KERN_INFO "NFSD: Forgot %d locks", count); +u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_lock(clp, max, NULL); + nfsd_print_count(clp, count, "locked files"); + return count; } -void nfsd_forget_openowners(u64 num) +static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) { - int count; + struct nfs4_openowner *oop, *next; + u64 count = 0; - nfs4_lock_state(); - count = nfsd_release_n_owners(num, true, release_openowner_sop); - nfs4_unlock_state(); + list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { + if (func) + func(oop); + if (++count == max) + break; + } - printk(KERN_INFO "NFSD: Forgot %d open owners", count); + return count; } -int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *)) +u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) { - int i, count = 0; - struct nfs4_file *fp, *fnext; - struct nfs4_delegation *dp, *dnext; + return nfsd_foreach_client_open(clp, max, release_openowner); +} - for (i = 0; i < FILE_HASH_SIZE; i++) { - list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) { - list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) { - deleg_func(dp); - if (++count == num) - return count; - } - } - } +u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_open(clp, max, NULL); + nfsd_print_count(clp, count, "open files"); + return count; +} +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) +{ + struct nfs4_delegation *dp, *next; + u64 count = 0; + + lockdep_assert_held(&state_lock); + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) + list_move(&dp->dl_recall_lru, victims); + if (++count == max) + break; + } return count; } -void nfsd_forget_delegations(u64 num) +u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) { - unsigned int count; + struct nfs4_delegation *dp, *next; + LIST_HEAD(victims); + u64 count; - nfs4_lock_state(); - count = nfsd_process_n_delegations(num, unhash_delegation); - nfs4_unlock_state(); + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, &victims); + spin_unlock(&state_lock); + + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + revoke_delegation(dp); - printk(KERN_INFO "NFSD: Forgot %d delegations", count); + return count; } -void nfsd_recall_delegations(u64 num) +u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) { - unsigned int count; + struct nfs4_delegation *dp, *next; + LIST_HEAD(victims); + u64 count; - nfs4_lock_state(); - spin_lock(&recall_lock); - count = nfsd_process_n_delegations(num, nfsd_break_one_deleg); - spin_unlock(&recall_lock); - nfs4_unlock_state(); + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, &victims); + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + nfsd_break_one_deleg(dp); + spin_unlock(&state_lock); - printk(KERN_INFO "NFSD: Recalled %d delegations", count); + return count; } -#endif /* CONFIG_NFSD_FAULT_INJECTION */ +u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +{ + u64 count = 0; -/* initialization to perform at module load time: */ + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, NULL); + spin_unlock(&state_lock); -void -nfs4_state_init(void) + nfsd_print_count(clp, count, "delegations"); + return count; +} + +u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) { - int i; + struct nfs4_client *clp, *next; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - INIT_LIST_HEAD(&conf_id_hashtbl[i]); - INIT_LIST_HEAD(&conf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_id_hashtbl[i]); - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); - } - for (i = 0; i < SESSION_HASH_SIZE; i++) - INIT_LIST_HEAD(&sessionid_hashtbl[i]); - for (i = 0; i < FILE_HASH_SIZE; i++) { - INIT_LIST_HEAD(&file_hashtbl[i]); - } - for (i = 0; i < OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&ownerstr_hashtbl[i]); + if (!nfsd_netns_ready(nn)) + return 0; + + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += func(clp, max - count); + if ((max != 0) && (count >= max)) + break; } - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]); - INIT_LIST_HEAD(&close_lru); - INIT_LIST_HEAD(&client_lru); - INIT_LIST_HEAD(&del_recall_lru); - reclaim_str_hashtbl_size = 0; + + return count; } -static void -nfsd4_load_reboot_recovery_data(void) +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) { - int status; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); - nfs4_lock_state(); - nfsd4_init_recdir(); - status = nfsd4_recdir_load(); - nfs4_unlock_state(); - if (status) - printk("NFSD: Failure reading reboot recovery data\n"); + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; } +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -4611,83 +5100,275 @@ set_max_delegations(void) max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } +static int nfs4_state_create_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->conf_id_hashtbl) + goto err; + nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->unconf_id_hashtbl) + goto err_unconf_id; + nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!nn->ownerstr_hashtbl) + goto err_ownerstr; + nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * + LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); + if (!nn->lockowner_ino_hashtbl) + goto err_lockowner_ino; + nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * + SESSION_HASH_SIZE, GFP_KERNEL); + if (!nn->sessionid_hashtbl) + goto err_sessionid; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); + INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); + nn->conf_name_tree = RB_ROOT; + nn->unconf_name_tree = RB_ROOT; + INIT_LIST_HEAD(&nn->client_lru); + INIT_LIST_HEAD(&nn->close_lru); + INIT_LIST_HEAD(&nn->del_recall_lru); + spin_lock_init(&nn->client_lock); + + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + get_net(net); + + return 0; + +err_sessionid: + kfree(nn->lockowner_ino_hashtbl); +err_lockowner_ino: + kfree(nn->ownerstr_hashtbl); +err_ownerstr: + kfree(nn->unconf_id_hashtbl); +err_unconf_id: + kfree(nn->conf_id_hashtbl); +err: + return -ENOMEM; +} + +static void +nfs4_state_destroy_net(struct net *net) +{ + int i; + struct nfs4_client *clp = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->conf_id_hashtbl[i])) { + clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->unconf_id_hashtbl[i])) { + clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + kfree(nn->sessionid_hashtbl); + kfree(nn->lockowner_ino_hashtbl); + kfree(nn->ownerstr_hashtbl); + kfree(nn->unconf_id_hashtbl); + kfree(nn->conf_id_hashtbl); + put_net(net); +} + +int +nfs4_state_start_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + ret = nfs4_state_create_net(net); + if (ret) + return ret; + nfsd4_client_tracking_init(net); + nn->boot_time = get_seconds(); + locks_start_grace(net, &nn->nfsd4_manager); + nn->grace_ended = false; + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", + nn->nfsd4_grace, net); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); + return 0; +} + /* initialization to perform when the nfsd service is started: */ -static int -__nfs4_state_start(void) +int +nfs4_state_start(void) { int ret; - boot_time = get_seconds(); - locks_start_grace(&nfsd4_manager); - printk(KERN_INFO "NFSD: starting %ld-second grace period\n", - nfsd4_grace); ret = set_callback_cred(); if (ret) return -ENOMEM; laundry_wq = create_singlethread_workqueue("nfsd4"); - if (laundry_wq == NULL) - return -ENOMEM; + if (laundry_wq == NULL) { + ret = -ENOMEM; + goto out_recovery; + } ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; - queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ); + set_max_delegations(); + return 0; + out_free_laundry: destroy_workqueue(laundry_wq); +out_recovery: return ret; } -int -nfs4_state_start(void) -{ - nfsd4_load_reboot_recovery_data(); - return __nfs4_state_start(); -} - -static void -__nfs4_state_shutdown(void) +void +nfs4_state_shutdown_net(struct net *net) { - int i; - struct nfs4_client *clp = NULL; struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&conf_id_hashtbl[i])) { - clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); - expire_client(clp); - } - while (!list_empty(&unconf_str_hashtbl[i])) { - clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); - expire_client(clp); - } - } + cancel_delayed_work_sync(&nn->laundromat_work); + locks_end_grace(&nn->nfsd4_manager); + + nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); } - nfsd4_shutdown_recdir(); + nfsd4_client_tracking_exit(net); + nfs4_state_destroy_net(net); + nfs4_unlock_state(); } void nfs4_state_shutdown(void) { - cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); - locks_end_grace(&nfsd4_manager); - nfs4_lock_state(); - nfs4_release_reclaim(); - __nfs4_state_shutdown(); - nfs4_unlock_state(); nfsd4_destroy_callback_queue(); } + +static void +get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) +{ + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); +} + +static void +put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) +{ + if (cstate->minorversion) { + memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } +} + +void +clear_current_stateid(struct nfsd4_compound_state *cstate) +{ + CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); +} + +/* + * functions to set current state id + */ +void +nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) +{ + put_stateid(cstate, &odp->od_stateid); +} + +void +nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + put_stateid(cstate, &open->op_stateid); +} + +void +nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + put_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock) +{ + put_stateid(cstate, &lock->lk_resp_stateid); +} + +/* + * functions to consume current state id + */ + +void +nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) +{ + get_stateid(cstate, &odp->od_stateid); +} + +void +nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp) +{ + get_stateid(cstate, &drp->dr_stateid); +} + +void +nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp) +{ + get_stateid(cstate, &fsp->fr_stateid); +} + +void +nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) +{ + get_stateid(cstate, &setattr->sa_stateid); +} + +void +nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + get_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) +{ + get_stateid(cstate, &locku->lu_stateid); +} + +void +nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read) +{ + get_stateid(cstate, &read->rd_stateid); +} + +void +nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write) +{ + get_stateid(cstate, &write->wr_stateid); +} diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0ec5a1b9700..944275c8f56 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -53,6 +53,12 @@ #include "vfs.h" #include "state.h" #include "cache.h" +#include "netns.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> +#endif + #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -65,17 +71,17 @@ #define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL static __be32 -check_filename(char *str, int len, __be32 err) +check_filename(char *str, int len) { int i; if (len == 0) return nfserr_inval; if (isdotent(str, len)) - return err; + return nfserr_badname; for (i = 0; i < len; i++) if (str[i] == '/') - return err; + return nfserr_badname; return 0; } @@ -92,16 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ32(x) (x) = ntohl(*p++) -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) -#define READTIME(x) do { \ - p++; \ - (x) = ntohl(*p++); \ - p++; \ -} while (0) #define READMEM(x,nbytes) do { \ x = (char *)p; \ p += XDR_QUADLEN(nbytes); \ @@ -133,20 +129,17 @@ xdr_error: \ } \ } while (0) -static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +static void next_decode_page(struct nfsd4_compoundargs *argp) { - savep->p = argp->p; - savep->end = argp->end; - savep->pagelen = argp->pagelen; - savep->pagelist = argp->pagelist; -} - -static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) -{ - argp->p = savep->p; - argp->end = savep->end; - argp->pagelen = savep->pagelen; - argp->pagelist = savep->pagelist; + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } } static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) @@ -176,16 +169,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; @@ -196,6 +180,15 @@ static int zero_clientid(clientid_t *clid) return (clid->cl_boot == 0) && (clid->cl_id == 0); } +/** + * defer_free - mark an allocation as deferred freed + * @argp: NFSv4 compound argument structure to be freed with + * @release: release callback to free @p, typically kfree() + * @p: pointer to be freed + * + * Marks @p to be freed when processing the compound operation + * described in @argp finishes. + */ static int defer_free(struct nfsd4_compoundargs *argp, void (*release)(const void *), void *p) @@ -212,6 +205,16 @@ defer_free(struct nfsd4_compoundargs *argp, return 0; } +/** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with + * @p: pointer to be duplicated + * @nbytes: length to be duplicated + * + * Returns a pointer to a copy of @nbytes bytes of memory at @p + * that are preserved until processing of the NFSv4 compound + * operation described by @argp finishes. + */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { @@ -240,29 +243,30 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[2] = 0; READ_BUF(4); - READ32(bmlen); + bmlen = be32_to_cpup(p++); if (bmlen > 1000) goto xdr_error; READ_BUF(bmlen << 2); if (bmlen > 0) - READ32(bmval[0]); + bmval[0] = be32_to_cpup(p++); if (bmlen > 1) - READ32(bmval[1]); + bmval[1] = be32_to_cpup(p++); if (bmlen > 2) - READ32(bmval[2]); + bmval[2] = be32_to_cpup(p++); DECODE_TAIL; } static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl) + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; + u64 sec; char *buf; - int host_err; DECODE_HEAD; iattr->ia_valid = 0; @@ -270,51 +274,50 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return status; READ_BUF(4); - READ32(expected_len); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); len += 8; - READ64(iattr->ia_size); + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; - READ32(nace); + nace = be32_to_cpup(p++); if (nace > NFS4_ACL_MAX) - return nfserr_resource; + return nfserr_fbig; *acl = nfs4_acl_new(nace); - if (*acl == NULL) { - host_err = -ENOMEM; - goto out_nfserr; - } + if (*acl == NULL) + return nfserr_jukebox; + defer_free(argp, kfree, *acl); (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace->type); - READ32(ace->flag); - READ32(ace->access_mask); - READ32(dummy32); + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); ace->whotype = nfs4_acl_get_whotype(buf, dummy32); status = nfs_ok; if (ace->whotype != NFS4_ACL_WHO_NAMED) - ace->who = 0; + ; else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) status = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace->who); + buf, dummy32, &ace->who_gid); else status = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace->who); + buf, dummy32, &ace->who_uid); if (status) return status; } @@ -323,14 +326,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; - READ32(iattr->ia_mode); + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -341,7 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); @@ -352,18 +355,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); - READ32(iattr->ia_atime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_atime.tv_sec = (time_t)sec; + iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -378,18 +379,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); - READ32(iattr->ia_mtime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_mtime.tv_sec = sec; + iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -401,6 +400,33 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; } } + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + label->len = dummy32; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) @@ -409,10 +435,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; DECODE_TAIL; - -out_nfserr: - status = nfserrno(host_err); - goto out; } static __be32 @@ -421,7 +443,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(sid->si_generation); + sid->si_generation = be32_to_cpup(p++); COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -433,7 +455,98 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_HEAD; READ_BUF(4); - READ32(access->ac_req_access); + access->ac_req_access = be32_to_cpup(p++); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + DECODE_HEAD; + u32 dummy, uid, gid; + char *machine_name; + int i; + int nr_secflavs; + + /* callback_sec_params4 */ + READ_BUF(4); + nr_secflavs = be32_to_cpup(p++); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + if (cbs->flavor == (u32)(-1)) + cbs->flavor = RPC_AUTH_NULL; + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + dummy = be32_to_cpup(p++); + + /* machine name */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); + + /* more gids */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + if (cbs->flavor == (u32)(-1)) { + kuid_t kuid = make_kuid(&init_user_ns, uid); + kgid_t kgid = make_kgid(&init_user_ns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid" + "uid or gid ignoring!\n"); + } + } + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + dummy = be32_to_cpup(p++); + /* gcbp_handle_from_server */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; +} + +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + DECODE_HEAD; + + READ_BUF(4); + bc->bc_cb_program = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); DECODE_TAIL; } @@ -444,7 +557,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(bcts->dir); + bcts->dir = be32_to_cpup(p++); /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker * could help us figure out we should be using it. */ DECODE_TAIL; @@ -456,7 +569,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) DECODE_HEAD; READ_BUF(4); - READ32(close->cl_seqid); + close->cl_seqid = be32_to_cpup(p++); return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; @@ -469,8 +582,8 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit DECODE_HEAD; READ_BUF(12); - READ64(commit->co_offset); - READ32(commit->co_count); + p = xdr_decode_hyper(p, &commit->co_offset); + commit->co_count = be32_to_cpup(p++); DECODE_TAIL; } @@ -481,19 +594,30 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create DECODE_HEAD; READ_BUF(4); - READ32(create->cr_type); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: READ_BUF(4); - READ32(create->cr_linklen); + create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: READ_BUF(8); - READ32(create->cr_specdata1); - READ32(create->cr_specdata2); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -503,14 +627,14 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } READ_BUF(4); - READ32(create->cr_namelen); + create->cr_namelen = be32_to_cpup(p++); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl); + &create->cr_acl, &create->cr_label); if (status) goto out; @@ -535,10 +659,10 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) DECODE_HEAD; READ_BUF(4); - READ32(link->li_namelen); + link->li_namelen = be32_to_cpup(p++); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + if ((status = check_filename(link->li_name, link->li_namelen))) return status; DECODE_TAIL; @@ -553,24 +677,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ READ_BUF(28); - READ32(lock->lk_type); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(lock->lk_reclaim); - READ64(lock->lk_offset); - READ64(lock->lk_length); - READ32(lock->lk_is_new); + lock->lk_reclaim = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); + lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { READ_BUF(4); - READ32(lock->lk_new_open_seqid); + lock->lk_new_open_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); if (status) return status; READ_BUF(8 + sizeof(clientid_t)); - READ32(lock->lk_new_lock_seqid); + lock->lk_new_lock_seqid = be32_to_cpup(p++); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - READ32(lock->lk_new_owner.len); + lock->lk_new_owner.len = be32_to_cpup(p++); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { @@ -578,7 +702,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) if (status) return status; READ_BUF(4); - READ32(lock->lk_old_lock_seqid); + lock->lk_old_lock_seqid = be32_to_cpup(p++); } DECODE_TAIL; @@ -590,13 +714,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) DECODE_HEAD; READ_BUF(32); - READ32(lockt->lt_type); + lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; - READ64(lockt->lt_offset); - READ64(lockt->lt_length); + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); - READ32(lockt->lt_owner.len); + lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -609,16 +733,16 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) DECODE_HEAD; READ_BUF(8); - READ32(locku->lu_type); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(locku->lu_seqid); + locku->lu_seqid = be32_to_cpup(p++); status = nfsd4_decode_stateid(argp, &locku->lu_stateid); if (status) return status; READ_BUF(16); - READ64(locku->lu_offset); - READ64(locku->lu_length); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); DECODE_TAIL; } @@ -629,23 +753,27 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_HEAD; READ_BUF(4); - READ32(lookup->lo_len); + lookup->lo_len = be32_to_cpup(p++); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + if ((status = check_filename(lookup->lo_name, lookup->lo_len))) return status; DECODE_TAIL; } -static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) +static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { __be32 *p; u32 w; READ_BUF(4); - READ32(w); - *x = w; + w = be32_to_cpup(p++); + *share_access = w & NFS4_SHARE_ACCESS_MASK; + *deleg_want = w & NFS4_SHARE_WANT_MASK; + if (deleg_when) + *deleg_when = w & NFS4_SHARE_WHEN_MASK; + switch (w & NFS4_SHARE_ACCESS_MASK) { case NFS4_SHARE_ACCESS_READ: case NFS4_SHARE_ACCESS_WRITE: @@ -673,6 +801,9 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) w &= ~NFS4_SHARE_WANT_MASK; if (!w) return nfs_ok; + + if (!deleg_when) /* open_downgrade */ + return nfserr_inval; switch (w) { case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: @@ -689,7 +820,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) __be32 *p; READ_BUF(4); - READ32(*x); + *x = be32_to_cpup(p++); /* Note: unlinke access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; @@ -703,7 +834,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne __be32 *p; READ_BUF(4); - READ32(o->len); + o->len = be32_to_cpup(p++); if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) return nfserr_bad_xdr; @@ -719,15 +850,19 @@ static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { DECODE_HEAD; + u32 dummy; memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; open->op_openowner = NULL; + open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); - READ32(open->op_seqid); - status = nfsd4_decode_share_access(argp, &open->op_share_access); + open->op_seqid = be32_to_cpup(p++); + /* decode, yet ignore deleg_when until supported */ + status = nfsd4_decode_share_access(argp, &open->op_share_access, + &open->op_deleg_want, &dummy); if (status) goto xdr_error; status = nfsd4_decode_share_deny(argp, &open->op_share_deny); @@ -739,32 +874,32 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) if (status) goto xdr_error; READ_BUF(4); - READ32(open->op_create); + open->op_create = be32_to_cpup(p++); switch (open->op_create) { case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: READ_BUF(4); - READ32(open->op_createmode); + open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; case NFS4_CREATE_EXCLUSIVE: - READ_BUF(8); - COPYMEM(open->op_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); break; case NFS4_CREATE_EXCLUSIVE4_1: if (argp->minorversion < 1) goto xdr_error; - READ_BUF(8); - COPYMEM(open->op_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -778,30 +913,30 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* open_claim */ READ_BUF(4); - READ32(open->op_claim_type); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: READ_BUF(4); - READ32(open->op_delegate_type); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_FH: @@ -828,13 +963,16 @@ static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { DECODE_HEAD; - + + if (argp->minorversion >= 1) + return nfserr_notsupp; + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; READ_BUF(4); - READ32(open_conf->oc_seqid); - + open_conf->oc_seqid = be32_to_cpup(p++); + DECODE_TAIL; } @@ -847,8 +985,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d if (status) return status; READ_BUF(4); - READ32(open_down->od_seqid); - status = nfsd4_decode_share_access(argp, &open_down->od_share_access); + open_down->od_seqid = be32_to_cpup(p++); + status = nfsd4_decode_share_access(argp, &open_down->od_share_access, + &open_down->od_deleg_want, NULL); if (status) return status; status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); @@ -863,7 +1002,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) DECODE_HEAD; READ_BUF(4); - READ32(putfh->pf_fhlen); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) goto xdr_error; READ_BUF(putfh->pf_fhlen); @@ -873,6 +1012,14 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +{ + if (argp->minorversion == 0) + return nfs_ok; + return nfserr_notsupp; +} + +static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; @@ -881,8 +1028,8 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) if (status) return status; READ_BUF(12); - READ64(read->rd_offset); - READ32(read->rd_length); + p = xdr_decode_hyper(p, &read->rd_offset); + read->rd_length = be32_to_cpup(p++); DECODE_TAIL; } @@ -893,10 +1040,10 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read DECODE_HEAD; READ_BUF(24); - READ64(readdir->rd_cookie); + p = xdr_decode_hyper(p, &readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ - READ32(readdir->rd_maxcount); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -909,10 +1056,10 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove DECODE_HEAD; READ_BUF(4); - READ32(remove->rm_namelen); + remove->rm_namelen = be32_to_cpup(p++); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + if ((status = check_filename(remove->rm_name, remove->rm_namelen))) return status; DECODE_TAIL; @@ -924,15 +1071,15 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename DECODE_HEAD; READ_BUF(4); - READ32(rename->rn_snamelen); + rename->rn_snamelen = be32_to_cpup(p++); READ_BUF(rename->rn_snamelen + 4); SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ32(rename->rn_tnamelen); + rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) return status; DECODE_TAIL; @@ -943,6 +1090,9 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(sizeof(clientid_t)); COPYMEM(clientid, sizeof(clientid_t)); @@ -956,11 +1106,10 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(secinfo->si_namelen); + secinfo->si_namelen = be32_to_cpup(p++); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen, - nfserr_noent); + status = check_filename(secinfo->si_name, secinfo->si_namelen); if (status) return status; DECODE_TAIL; @@ -973,7 +1122,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(sin->sin_style); + sin->sin_style = be32_to_cpup(p++); DECODE_TAIL; } @@ -986,7 +1135,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl); + &setattr->sa_acl, &setattr->sa_label); } static __be32 @@ -994,23 +1143,26 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; - READ_BUF(8); - COPYMEM(setclientid->se_verf.data, 8); + if (argp->minorversion >= 1) + return nfserr_notsupp; + + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_opaque(argp, &setclientid->se_name); if (status) return nfserr_bad_xdr; READ_BUF(8); - READ32(setclientid->se_callback_prog); - READ32(setclientid->se_callback_netid_len); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_netid_len + 4); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ32(setclientid->se_callback_addr_len); + setclientid->se_callback_addr_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_addr_len + 4); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ32(setclientid->se_callback_ident); + setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; } @@ -1020,9 +1172,12 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; - READ_BUF(8 + sizeof(nfs4_verifier)); + if (argp->minorversion >= 1) + return nfserr_notsupp; + + READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); - COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); + COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); DECODE_TAIL; } @@ -1031,33 +1186,16 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { -#if 0 - struct nfsd4_compoundargs save = { - .p = argp->p, - .end = argp->end, - .rqstp = argp->rqstp, - }; - u32 ve_bmval[2]; - struct iattr ve_iattr; /* request */ - struct nfs4_acl *ve_acl; /* request */ -#endif DECODE_HEAD; if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) goto out; /* For convenience's sake, we compare raw xdr'd attributes in - * nfsd4_proc_verify; however we still decode here just to return - * correct error in case of bad xdr. */ -#if 0 - status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); - if (status == nfserr_inval) { - status = nfserrno(status); - goto out; - } -#endif + * nfsd4_proc_verify */ + READ_BUF(4); - READ32(verify->ve_attrlen); + verify->ve_attrlen = be32_to_cpup(p++); READ_BUF(verify->ve_attrlen); SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -1068,7 +1206,6 @@ static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { int avail; - int v; int len; DECODE_HEAD; @@ -1076,11 +1213,11 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) if (status) return status; READ_BUF(16); - READ64(write->wr_offset); - READ32(write->wr_stable_how); + p = xdr_decode_hyper(p, &write->wr_offset); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; - READ32(write->wr_buflen); + write->wr_buflen = be32_to_cpup(p++); /* Sorry .. no magic macros for this.. * * READ_BUF(write->wr_buflen); @@ -1092,27 +1229,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) __FILE__, __LINE__); goto xdr_error; } - argp->rqstp->rq_vec[0].iov_base = p; - argp->rqstp->rq_vec[0].iov_len = avail; - v = 0; - len = write->wr_buflen; - while (len > argp->rqstp->rq_vec[v].iov_len) { - len -= argp->rqstp->rq_vec[v].iov_len; - v++; - argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); + write->wr_head.iov_base = p; + write->wr_head.iov_len = avail; + write->wr_pagelist = argp->pagelist; + + len = XDR_QUADLEN(write->wr_buflen) << 2; + if (len >= avail) { + int pages; + + len -= avail; + + pages = len >> PAGE_SHIFT; + argp->pagelist += pages; + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + + argp->p = (__be32 *)page_address(argp->pagelist[0]); argp->pagelist++; - if (argp->pagelen >= PAGE_SIZE) { - argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; - argp->pagelen -= PAGE_SIZE; - } else { - argp->rqstp->rq_vec[v].iov_len = argp->pagelen; - argp->pagelen -= len; - } + argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } - argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); - argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); - argp->rqstp->rq_vec[v].iov_len = len; - write->wr_vlen = v+1; + argp->p += XDR_QUADLEN(len); DECODE_TAIL; } @@ -1122,9 +1258,12 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - READ32(rlockowner->rl_owner.len); + rlockowner->rl_owner.len = be32_to_cpup(p++); READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); @@ -1148,63 +1287,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; READ_BUF(4); - READ32(exid->flags); + exid->flags = be32_to_cpup(p++); /* Ignore state_protect4_a */ READ_BUF(4); - READ32(exid->spa_how); + exid->spa_how = be32_to_cpup(p++); switch (exid->spa_how) { case SP4_NONE: break; case SP4_MACH_CRED: /* spo_must_enforce */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* spo_must_allow */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; break; case SP4_SSV: /* ssp_ops */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy * 4); p += dummy; /* ssp_hash_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_encr_algs<> */ READ_BUF(4); - READ32(tmp); + tmp = be32_to_cpup(p++); while (tmp--) { READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); } /* ssp_window and ssp_num_gss_handles */ READ_BUF(8); - READ32(dummy); - READ32(dummy); + dummy = be32_to_cpup(p++); + dummy = be32_to_cpup(p++); break; default: goto xdr_error; @@ -1212,7 +1351,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ - READ32(dummy); + dummy = be32_to_cpup(p++); if (dummy > 1) goto xdr_error; @@ -1220,13 +1359,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, if (dummy == 1) { /* nii_domain */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); /* nii_name */ READ_BUF(4); - READ32(dummy); + dummy = be32_to_cpup(p++); READ_BUF(dummy); p += XDR_QUADLEN(dummy); @@ -1242,29 +1381,25 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, struct nfsd4_create_session *sess) { DECODE_HEAD; - u32 dummy; - char *machine_name; - int i; - int nr_secflavs; READ_BUF(16); COPYMEM(&sess->clientid, 8); - READ32(sess->seqid); - READ32(sess->flags); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); /* Fore channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->fore_channel.maxreq_sz); - READ32(sess->fore_channel.maxresp_sz); - READ32(sess->fore_channel.maxresp_cached); - READ32(sess->fore_channel.maxops); - READ32(sess->fore_channel.maxreqs); - READ32(sess->fore_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->fore_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->fore_channel.rdma_attrs); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->fore_channel.nr_rdma_attrs > 1) { dprintk("Too many fore channel attr bitmaps!\n"); goto xdr_error; @@ -1272,73 +1407,24 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, /* Back channel attrs */ READ_BUF(28); - READ32(dummy); /* headerpadsz is always 0 */ - READ32(sess->back_channel.maxreq_sz); - READ32(sess->back_channel.maxresp_sz); - READ32(sess->back_channel.maxresp_cached); - READ32(sess->back_channel.maxops); - READ32(sess->back_channel.maxreqs); - READ32(sess->back_channel.nr_rdma_attrs); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); if (sess->back_channel.nr_rdma_attrs == 1) { READ_BUF(4); - READ32(sess->back_channel.rdma_attrs); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); } else if (sess->back_channel.nr_rdma_attrs > 1) { dprintk("Too many back channel attr bitmaps!\n"); goto xdr_error; } - READ_BUF(8); - READ32(sess->callback_prog); - - /* callback_sec_params4 */ - READ32(nr_secflavs); - for (i = 0; i < nr_secflavs; ++i) { - READ_BUF(4); - READ32(dummy); - switch (dummy) { - case RPC_AUTH_NULL: - /* Nothing to read */ - break; - case RPC_AUTH_UNIX: - READ_BUF(8); - /* stamp */ - READ32(dummy); - - /* machine name */ - READ32(dummy); - READ_BUF(dummy); - SAVEMEM(machine_name, dummy); - - /* uid, gid */ - READ_BUF(8); - READ32(sess->uid); - READ32(sess->gid); - - /* more gids */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy * 4); - break; - case RPC_AUTH_GSS: - dprintk("RPC_AUTH_GSS callback secflavor " - "not supported!\n"); - READ_BUF(8); - /* gcbp_service */ - READ32(dummy); - /* gcbp_handle_from_server */ - READ32(dummy); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - /* gcbp_handle_from_client */ - READ_BUF(4); - READ32(dummy); - READ_BUF(dummy); - break; - default: - dprintk("Illegal callback secflavor\n"); - return nfserr_inval; - } - } + READ_BUF(4); + sess->callback_prog = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &sess->cb_sec); DECODE_TAIL; } @@ -1360,7 +1446,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(sizeof(stateid_t)); - READ32(free_stateid->fr_stateid.si_generation); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); DECODE_TAIL; @@ -1374,10 +1460,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - READ32(seq->seqid); - READ32(seq->slotid); - READ32(seq->maxslots); - READ32(seq->cachethis); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); DECODE_TAIL; } @@ -1385,26 +1471,29 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) { - unsigned int nbytes; - stateid_t si; int i; - __be32 *p; - __be32 status; + __be32 *p, status; + struct nfsd4_test_stateid_id *stateid; READ_BUF(4); test_stateid->ts_num_ids = ntohl(*p++); - nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); - if (nbytes > (u32)((char *)argp->end - (char *)argp->p)) - goto xdr_error; - - test_stateid->ts_saved_args = argp; - save_buf(argp, &test_stateid->ts_savedp); + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); for (i = 0; i < test_stateid->ts_num_ids; i++) { - status = nfsd4_decode_stateid(argp, &si); + stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + if (!stateid) { + status = nfserrno(-ENOMEM); + goto out; + } + + defer_free(argp, kfree, stateid); + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + + status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); if (status) - return status; + goto out; } status = 0; @@ -1431,7 +1520,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str DECODE_HEAD; READ_BUF(4); - READ32(rc->rca_one_fs); + rc->rca_one_fs = be32_to_cpup(p++); DECODE_TAIL; } @@ -1471,7 +1560,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_READ] = (nfsd4_dec)nfsd4_decode_read, [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, @@ -1488,49 +1577,9 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, -}; - -static nfsd4_dec nfsd41_dec_ops[] = { - [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] = (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, - [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, @@ -1551,37 +1600,39 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; -struct nfsd4_minorversion_ops { - nfsd4_dec *decoders; - int nops; -}; - -static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { - [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, - [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, -}; +static inline bool +nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) +{ + if (op->opnum < FIRST_NFS4_OP) + return false; + else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) + return false; + else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) + return false; + else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) + return false; + return true; +} static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; - struct nfsd4_minorversion_ops *ops; bool cachethis = false; + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ + int readcount = 0; + int readbytes = 0; int i; - /* - * XXX: According to spec, we should check the tag - * for UTF-8 compliance. I'm postponing this for - * now because it seems that some clients do use - * binary tags. - */ READ_BUF(4); - READ32(argp->taglen); + argp->taglen = be32_to_cpup(p++); READ_BUF(argp->taglen + 8); SAVEMEM(argp->tag, argp->taglen); - READ32(argp->minorversion); - READ32(argp->opcnt); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; @@ -1597,221 +1648,170 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } - if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) + if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION) argp->opcnt = 0; - ops = &nfsd4_minorversion[argp->minorversion]; for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; - /* - * We can't use READ_BUF() here because we need to handle - * a missing opcode as an OP_WRITE + 1. So we need to check - * to see if we're truly at the end of our buffer or if there - * is another page we need to flip to. - */ - - if (argp->p == argp->end) { - if (argp->pagelen < 4) { - /* There isn't an opcode still on the wire */ - op->opnum = OP_WRITE + 1; - op->status = nfserr_bad_xdr; - argp->opcnt = i+1; - break; - } - - /* - * False alarm. We just hit a page boundary, but there - * is still data available. Move pointer across page - * boundary. *snip from READ_BUF* - */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } - } - op->opnum = ntohl(*argp->p++); + READ_BUF(4); + op->opnum = be32_to_cpup(p++); - if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) - op->status = ops->decoders[op->opnum](argp, &op->u); + if (nfsd4_opnum_in_range(argp, op)) + op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - - if (op->status) { - argp->opcnt = i+1; - break; - } /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: */ cachethis |= nfsd4_cache_this_op(op); + + if (op->opnum == OP_READ) { + readcount++; + readbytes += nfsd4_max_reply(argp->rqstp, op); + } else + max_reply += nfsd4_max_reply(argp->rqstp, op); + + if (op->status) { + argp->opcnt = i+1; + break; + } } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; + svc_reserve(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; - DECODE_TAIL; -} - -#define WRITE32(n) *p++ = htonl(n) -#define WRITE64(n) do { \ - *p++ = htonl((u32)((n) >> 32)); \ - *p++ = htonl((u32)(n)); \ -} while (0) -#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \ - *(p + XDR_QUADLEN(nbytes) -1) = 0; \ - memcpy(p, ptr, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -}} while (0) - -static void write32(__be32 **p, u32 n) -{ - *(*p)++ = n; -} + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) + argp->rqstp->rq_splice_ok = false; -static void write64(__be32 **p, u64 n) -{ - write32(p, (u32)(n >> 32)); - write32(p, (u32)n); + DECODE_TAIL; } -static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) { if (IS_I_VERSION(inode)) { - write64(p, inode->i_version); + p = xdr_encode_hyper(p, inode->i_version); } else { - write32(p, stat->ctime.tv_sec); - write32(p, stat->ctime.tv_nsec); + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); } + return p; } -static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { - write32(p, c->atomic); + *p++ = cpu_to_be32(c->atomic); if (c->change_supported) { - write64(p, c->before_change); - write64(p, c->after_change); + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); } else { - write32(p, c->before_ctime_sec); - write32(p, c->before_ctime_nsec); - write32(p, c->after_ctime_sec); - write32(p, c->after_ctime_nsec); - } -} - -#define RESERVE_SPACE(nbytes) do { \ - p = resp->p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ -} while (0) -#define ADJUST_ARGS() resp->p = p - -/* - * Header routine to setup seqid operation replay cache - */ -#define ENCODE_SEQID_OP_HEAD \ - __be32 *save; \ - \ - save = resp->p; - -/* - * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This - * is where sequence id's are incremented, and the replay cache is filled. - * Note that we increment sequence id's here, at the last moment, so we're sure - * we know whether the error to be returned is a sequence id mutating error. - */ - -static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) -{ - struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; - - if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { - stateowner->so_seqid++; - stateowner->so_replay.rp_status = nfserr; - stateowner->so_replay.rp_buflen = - (char *)resp->p - (char *)save; - memcpy(stateowner->so_replay.rp_buf, save, - stateowner->so_replay.rp_buflen); - nfsd4_purge_closed_stateid(stateowner); + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); } + return p; } /* Encode as an array of strings the string given with components - * separated @sep. + * separated @sep, escaped with esc_enter and esc_exit. */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, + char *components, char esc_enter, + char esc_exit) { - __be32 *p = *pp; - __be32 *countp = p; + __be32 *p; + __be32 pathlen; + int pathlen_offset; int strlen, count=0; - char *str, *end; + char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); - if ((*buflen -= 4) < 0) + + pathlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(0); /* We will fill this in with @count later */ + p++; /* We will fill this in with @count later */ + end = str = components; while (*end) { - for (; *end && (*end != sep); end++) - ; /* Point to end of component */ + bool found_esc = false; + + /* try to parse as esc_start, ..., esc_end, sep */ + if (*str == esc_enter) { + for (; *end && (*end != esc_exit); end++) + /* find esc_exit or end of string */; + next = end + 1; + if (*end && (!*next || *next == sep)) { + str++; + found_esc = true; + } + } + + if (!found_esc) + for (; *end && (*end != sep); end++) + /* find sep or end of string */; + strlen = end - str; if (strlen) { - if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + p = xdr_reserve_space(xdr, strlen + 4); + if (!p) return nfserr_resource; - WRITE32(strlen); - WRITEMEM(str, strlen); + p = xdr_encode_opaque(p, str, strlen); count++; } else end++; str = end; } - *pp = p; - p = countp; - WRITE32(count); + pathlen = htonl(xdr->buf->len - pathlen_offset); + write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4); return 0; } +/* Encode as an array of strings the string given with components + * separated @sep. + */ +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, + char *components) +{ + return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); +} + /* * encode a location element of a fs_locations structure */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, + struct nfsd4_fs_location *location) { __be32 status; - __be32 *p = *pp; - status = nfsd4_encode_components(':', location->hosts, &p, buflen); + status = nfsd4_encode_components_esc(xdr, ':', location->hosts, + '[', ']'); if (status) return status; - status = nfsd4_encode_components('/', location->path, &p, buflen); + status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - *pp = p; return 0; } /* * Encode a path in RFC3530 'pathname4' format */ -static __be32 nfsd4_encode_path(const struct path *root, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { - struct path cur = { - .mnt = path->mnt, - .dentry = path->dentry, - }; - __be32 *p = *pp; + struct path cur = *path; + __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -1842,27 +1842,30 @@ static __be32 nfsd4_encode_path(const struct path *root, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } - - *buflen -= 4; - if (*buflen < 0) + err = nfserr_resource; + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_free; - WRITE32(ncomponents); + *p++ = cpu_to_be32(ncomponents); while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; - unsigned int len = dentry->d_name.len; + unsigned int len; - *buflen -= 4 + (XDR_QUADLEN(len) << 2); - if (*buflen < 0) + spin_lock(&dentry->d_lock); + len = dentry->d_name.len; + p = xdr_reserve_space(xdr, len + 4); + if (!p) { + spin_unlock(&dentry->d_lock); goto out_free; - WRITE32(len); - WRITEMEM(dentry->d_name.name, len); + } + p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%s", dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); ncomponents--; } - *pp = p; err = 0; out_free: dprintk(")\n"); @@ -1873,8 +1876,8 @@ out_free: return err; } -static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, - const struct path *path, __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, + struct svc_rqst *rqstp, const struct path *path) { struct svc_export *exp_ps; __be32 res; @@ -1882,7 +1885,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, exp_ps = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp_ps)) return nfserrno(PTR_ERR(exp_ps)); - res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); exp_put(exp_ps); return res; } @@ -1890,28 +1893,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, /* * encode a fs_locations structure */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, - struct svc_export *exp, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, + struct svc_rqst *rqstp, struct svc_export *exp) { __be32 status; int i; - __be32 *p = *pp; + __be32 *p; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); + status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); if (status) return status; - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(fslocs->locations_count); + *p++ = cpu_to_be32(fslocs->locations_count); for (i=0; i<fslocs->locations_count; i++) { - status = nfsd4_encode_fs_location4(&fslocs->locations[i], - &p, buflen); + status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) return status; } - *pp = p; return 0; } @@ -1929,50 +1930,48 @@ static u32 nfs4_file_type(umode_t mode) }; } -static __be32 -nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) +static inline __be32 +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { - int status; - - if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) - return nfserr_resource; - if (whotype != NFS4_ACL_WHO_NAMED) - status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); - else if (group) - status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); + if (ace->whotype != NFS4_ACL_WHO_NAMED) + return nfs4_acl_write_who(xdr, ace->whotype); + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + return nfsd4_encode_group(xdr, rqstp, ace->who_gid); else - status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); - if (status < 0) - return nfserrno(status); - *p = xdr_encode_opaque(*p, NULL, status); - *buflen -= (XDR_QUADLEN(status) << 2) + 4; - BUG_ON(*buflen < 0); - return 0; + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } -static inline __be32 -nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) -{ - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); -} +#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ + FATTR4_WORD0_RDATTR_ERROR) +#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 -nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); -} + __be32 *p; -static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) -{ - return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); -} + p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + if (!p) + return nfserr_resource; -#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ - FATTR4_WORD0_RDATTR_ERROR) -#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + *p++ = cpu_to_be32(0); /* lfs */ + *p++ = cpu_to_be32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) +{ return 0; } +#endif static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { @@ -1990,40 +1989,59 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) return 0; } + +static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +{ + struct path path = exp->ex_path; + int err; + + path_get(&path); + while (follow_up(&path)) { + if (path.dentry != path.mnt->mnt_root) + break; + } + err = vfs_getattr(&path, stat); + path_put(&path); + return err; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. - * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. */ -__be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, +static __be32 +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + struct svc_export *exp, + struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; u32 bmval1 = bmval[1]; u32 bmval2 = bmval[2]; struct kstat stat; - struct svc_fh tempfh; + struct svc_fh *tempfh = NULL; struct kstatfs statfs; - int buflen = *countp << 2; - __be32 *attrlenp; + __be32 *p; + int starting_len = xdr->buf->len; + int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; __be32 status; int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { .mnt = exp->ex_path.mnt, .dentry = dentry, }; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); @@ -2037,11 +2055,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out; } - err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); + err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); @@ -2049,11 +2067,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { - fh_init(&tempfh, NFS4_FHSIZE); - status = fh_compose(&tempfh, exp, dentry, NULL); + tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + status = nfserr_jukebox; + if (!tempfh) + goto out; + fh_init(tempfh, NFS4_FHSIZE); + status = fh_compose(tempfh, exp, dentry, NULL); if (status) goto out; - fhp = &tempfh; + fhp = tempfh; } if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_SUPPORTED_ATTRS)) { @@ -2070,26 +2092,49 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; + } + } +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ + if (bmval2) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; - WRITE32(3); - WRITE32(bmval0); - WRITE32(bmval1); - WRITE32(bmval2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); } else if (bmval1) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); } else { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32(1); - WRITE32(bmval0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); } - attrlenp = p++; /* to be backfilled later */ + + attrlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 word0 = nfsd_suppattrs0(minorversion); @@ -2098,354 +2143,426 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(2); - WRITE32(word0); - WRITE32(word1); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); } else { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; - WRITE32(3); - WRITE32(word0); - WRITE32(word1); - WRITE32(word2); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(word2); } } if (bmval0 & FATTR4_WORD0_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; dummy = nfs4_file_type(stat.mode); - if (dummy == NF4BAD) - goto out_serverfault; - WRITE32(dummy); + if (dummy == NF4BAD) { + status = nfserr_serverfault; + goto out; + } + *p++ = cpu_to_be32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_PERSISTENT); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| + NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - write_change(&p, &stat, dentry->d_inode); + p = encode_change(p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.size); + p = xdr_encode_hyper(p, stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_FSID) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { - WRITE64(NFS4_REFERRAL_FSID_MAJOR); - WRITE64(NFS4_REFERRAL_FSID_MINOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); } else switch(fsid_source(fhp)) { case FSIDSOURCE_FSID: - WRITE64((u64)exp->ex_fsid); - WRITE64((u64)0); + p = xdr_encode_hyper(p, (u64)exp->ex_fsid); + p = xdr_encode_hyper(p, (u64)0); break; case FSIDSOURCE_DEV: - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MAJOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - WRITEMEM(exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, + EX_UUID_LEN); break; } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(nfsd4_lease); + *p++ = cpu_to_be32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(rdattr_err); + *p++ = cpu_to_be32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; if (acl == NULL) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); goto out_acl; } - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(acl->naces); + *p++ = cpu_to_be32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - if ((buflen -= 4*3) < 0) - goto out_resource; - WRITE32(ace->type); - WRITE32(ace->flag); - WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(rqstp, ace->whotype, - ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, - &p, &buflen); - if (status == nfserr_resource) + p = xdr_reserve_space(xdr, 4*3); + if (!p) goto out_resource; + *p++ = cpu_to_be32(ace->type); + *p++ = cpu_to_be32(ace->flag); + *p++ = cpu_to_be32(ace->access_mask & + NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; } } out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(aclsupport ? + *p++ = cpu_to_be32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; - if (buflen < 0) + p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); + if (!p) goto out_resource; - WRITE32(fhp->fh_handle.fh_size); - WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_files); + p = xdr_encode_hyper(p, (u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_fs_locations(xdr, rqstp, exp); if (status) goto out; } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(~(u64)0); + p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(255); + *p++ = cpu_to_be32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(statfs.f_namelen); + *p++ = cpu_to_be32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.mode & S_IALLUGO); + *p++ = cpu_to_be32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.nlink); + *p++ = cpu_to_be32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_user(xdr, rqstp, stat.uid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_group(xdr, rqstp, stat.gid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_RAWDEV) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32((u32) MAJOR(stat.rdev)); - WRITE32((u32) MINOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); - WRITE32(stat.atime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(1); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); - WRITE32(stat.mtime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; /* * Get parent's attributes if not ignoring crossmount * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - dentry == exp->ex_path.mnt->mnt_root) { - struct path path = exp->ex_path; - path_get(&path); - while (follow_up(&path)) { - if (path.dentry != path.mnt->mnt_root) - break; - } - err = vfs_getattr(path.mnt, path.dentry, &stat); - path_put(&path); - if (err) - goto out_nfserr; - } - WRITE64(stat.ino); + dentry == exp->ex_path.mnt->mnt_root) + get_parent_attributes(exp, &stat); + p = xdr_encode_hyper(p, stat.ino); + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(xdr, rqstp, context, + contextlen); + if (status) + goto out; } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - WRITE32(3); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); - WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); } - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (context) + security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); - if (fhp == &tempfh) - fh_put(&tempfh); + if (tempfh) { + fh_put(tempfh); + kfree(tempfh); + } + if (status) + xdr_truncate_encode(xdr, starting_len); return status; out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; -out_serverfault: - status = nfserr_serverfault; - goto out; +} + +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + struct xdr_buf dummy; + struct xdr_stream xdr; + __be32 ret; + + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); + ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, + ignore_crossmnt); + *p = xdr.p; + return ret; } static inline int attributes_need_mount(u32 *bmval) @@ -2458,8 +2575,8 @@ static inline int attributes_need_mount(u32 *bmval) } static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, + const char *name, int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2511,7 +2628,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, } out_encode: - nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -2520,19 +2637,19 @@ out_put: } static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - __be32 *attrlenp; + __be32 *p; - if (buflen < 6) + p = xdr_reserve_space(xdr, 20); + if (!p) return NULL; *p++ = htonl(2); *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ *p++ = htonl(0); /* bmval1 */ - attrlenp = p++; + *p++ = htonl(4); /* attribute length */ *p++ = nfserr; /* no htonl */ - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); return p; } @@ -2542,10 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, { struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); - int buflen; - __be32 *p = cd->buffer; - __be32 *cookiep; + struct xdr_stream *xdr = cd->xdr; + int start_offset = xdr->buf->len; + int cookie_offset; + int entry_bytes; __be32 nfserr = nfserr_toosmall; + __be64 wire_offset; + __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -2553,27 +2673,32 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->offset) - xdr_encode_hyper(cd->offset, (u64) offset); + if (cd->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, + &wire_offset, 8); + } - buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); - if (buflen < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto fail; - *p++ = xdr_one; /* mark entry present */ - cookiep = p; + cookie_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 3*4 + namlen); + if (!p) + goto fail; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; goto fail; case nfserr_noent: + xdr_truncate_encode(xdr, start_offset); goto skip_entry; default: /* @@ -2585,59 +2710,74 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + p = nfsd4_encode_rdattr_error(xdr, nfserr); if (p == NULL) { nfserr = nfserr_toosmall; goto fail; } } - cd->buflen -= (p - cd->buffer); - cd->buffer = p; - cd->offset = cookiep; + nfserr = nfserr_toosmall; + entry_bytes = xdr->buf->len - start_offset; + if (entry_bytes > cd->rd_maxcount) + goto fail; + cd->rd_maxcount -= entry_bytes; + if (!cd->rd_dircount) + goto fail; + cd->rd_dircount--; + cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; return 0; fail: + xdr_truncate_encode(xdr, start_offset); cd->common.err = nfserr; return -EINVAL; } -static void -nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) { __be32 *p; - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(sid->si_generation); - WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, sizeof(stateid_t)); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sid->si_generation); + p = xdr_encode_opaque_fixed(p, &sid->si_opaque, + sizeof(stateid_opaque_t)); + return 0; } static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITE32(access->ac_supported); - WRITE32(access->ac_resp_access); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); } return nfserr; } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); - WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(bcts->dir); - /* XXX: ? */ - WRITE32(0); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); + /* Sorry, we do not yet support RDMA over 4.1: */ + *p++ = cpu_to_be32(0); } return nfserr; } @@ -2645,12 +2785,11 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &close->cl_stateid); + nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2658,12 +2797,15 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITEMEM(commit->co_verf.data, 8); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + NFS4_VERIFIER_SIZE); } return nfserr; } @@ -2671,15 +2813,17 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(32); - write_cinfo(&p, &create->cr_cinfo); - WRITE32(2); - WRITE32(create->cr_bmval[0]); - WRITE32(create->cr_bmval[1]); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &create->cr_cinfo); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(create->cr_bmval[0]); + *p++ = cpu_to_be32(create->cr_bmval[1]); } return nfserr; } @@ -2688,33 +2832,31 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - int buflen; + struct xdr_stream *xdr = &resp->xdr; if (nfserr) return nfserr; - buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); - nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct xdr_stream *xdr = &resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; - RESERVE_SPACE(len + 4); - WRITE32(len); - WRITEMEM(&fhp->fh_handle.fh_base, len); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); } return nfserr; } @@ -2723,59 +2865,72 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh * Including all fields other than the name, a LOCK4denied structure requires * 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) { struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; - RESERVE_SPACE(32 + XDR_LEN(conf->len)); - WRITE64(ld->ld_start); - WRITE64(ld->ld_length); - WRITE32(ld->ld_type); +again: + p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); + if (!p) { + /* + * Don't fail to return the result just because we can't + * return the conflicting open: + */ + if (conf->len) { + kfree(conf->data); + conf->len = 0; + conf->data = NULL; + goto again; + } + return nfserr_resource; + } + p = xdr_encode_hyper(p, ld->ld_start); + p = xdr_encode_hyper(p, ld->ld_length); + *p++ = cpu_to_be32(ld->ld_type); if (conf->len) { - WRITEMEM(&ld->ld_clientid, 8); - WRITE32(conf->len); - WRITEMEM(conf->data, conf->len); + p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); + p = xdr_encode_opaque(p, conf->data, conf->len); kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - WRITE64((u64)0); /* clientid */ - WRITE32(0); /* length of owner name */ + p = xdr_encode_hyper(p, (u64)0); /* clientid */ + *p++ = cpu_to_be32(0); /* length of owner name */ } - ADJUST_ARGS(); + return nfserr_denied; } static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lock->lk_denied); + nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { + struct xdr_stream *xdr = &resp->xdr; + if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); return nfserr; } static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &locku->lu_stateid); + nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2783,12 +2938,14 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - write_cinfo(&p, &link->li_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); } return nfserr; } @@ -2797,201 +2954,336 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - ENCODE_SEQID_OP_HEAD; if (nfserr) goto out; - nfsd4_encode_stateid(resp, &open->op_stateid); - RESERVE_SPACE(40); - write_cinfo(&p, &open->op_cinfo); - WRITE32(open->op_rflags); - WRITE32(2); - WRITE32(open->op_bmval[0]); - WRITE32(open->op_bmval[1]); - WRITE32(open->op_delegate_type); - ADJUST_ARGS(); + nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); + if (nfserr) + goto out; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &open->op_cinfo); + *p++ = cpu_to_be32(open->op_rflags); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(open->op_bmval[0]); + *p++ = cpu_to_be32(open->op_bmval[1]); + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(20); - WRITE32(open->op_recall); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_recall); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: - nfsd4_encode_stateid(resp, &open->op_delegate_stateid); - RESERVE_SPACE(32); - WRITE32(0); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* * TODO: space_limit's in delegations */ - WRITE32(NFS4_LIMIT_SIZE); - WRITE32(~(u32)0); - WRITE32(~(u32)0); + *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); + *p++ = cpu_to_be32(~(u32)0); + *p++ = cpu_to_be32(~(u32)0); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ + break; + case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ + switch (open->op_why_no_deleg) { + case WND4_CONTENTION: + case WND4_RESOURCE: + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); + /* deleg signaling not supported yet: */ + *p++ = cpu_to_be32(0); + break; + default: + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); + } break; default: BUG(); } /* XXX save filehandle here */ out: - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); + nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) - nfsd4_encode_stateid(resp, &od->od_stateid); + nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - encode_seqid_op_tail(resp, save, nfserr); return nfserr; } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( + struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) { + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = xdr->buf; u32 eof; - int v, pn; - unsigned long maxcount; - long len; - __be32 *p; + int space_left; + __be32 nfserr; + __be32 *p = xdr->p - 2; - if (nfserr) - return nfserr; - if (resp->xbuf->page_len) + /* + * Don't inline pages unless we know there's room for eof, + * count, and possible padding: + */ + if (xdr->end - xdr->p < 3) return nfserr_resource; - RESERVE_SPACE(8); /* eof flag and byte count */ + nfserr = nfsd_splice_read(read->rd_rqstp, file, + read->rd_offset, &maxcount); + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + buf->page_len = 0; + return nfserr; + } - maxcount = svc_max_payload(resp->rqstp); - if (maxcount > read->rd_length) - maxcount = read->rd_length; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); + + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); + + buf->page_len = maxcount; + buf->len += maxcount; + xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Use rest of head for padding and remaining ops: */ + buf->tail[0].iov_base = xdr->p; + buf->tail[0].iov_len = 0; + xdr->iov = buf->tail; + if (maxcount&3) { + int pad = 4 - (maxcount&3); + + *(xdr->p++) = 0; + + buf->tail[0].iov_base += maxcount&3; + buf->tail[0].iov_len = pad; + buf->len += pad; + } + + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + buf->buflen - buf->len); + buf->buflen = buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + + return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int v; + int starting_len = xdr->buf->len - 8; + long len; + int thislen; + __be32 nfserr; + __be32 tmp; + __be32 *p; + u32 zzz = 0; + int pad; len = maxcount; v = 0; - while (len > 0) { - pn = resp->rqstp->rq_resused++; - resp->rqstp->rq_vec[v].iov_base = - page_address(resp->rqstp->rq_respages[pn]); - resp->rqstp->rq_vec[v].iov_len = - len < PAGE_SIZE ? len : PAGE_SIZE; + + thislen = (void *)xdr->end - (void *)xdr->p; + if (len < thislen) + thislen = len; + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; + v++; + len -= thislen; + + while (len) { + thislen = min_t(long, len, PAGE_SIZE); + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; v++; - len -= PAGE_SIZE; + len -= thislen; } read->rd_vlen = v; - nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, - &maxcount); - + nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, + read->rd_vlen, &maxcount); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - WRITE32(eof); - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); - } + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &zzz, pad); return 0; + } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) { - int maxcount; - char *page; + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_filp; + int starting_len = xdr->buf->len; + struct raparms *ra; __be32 *p; + __be32 err; if (nfserr) return nfserr; - if (resp->xbuf->page_len) + + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } + if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { + WARN_ON_ONCE(1); + return nfserr_resource; + } + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > xdr->buf->buflen - xdr->buf->len) + maxcount = xdr->buf->buflen - xdr->buf->len; + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + if (!read->rd_filp) { + err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, + &file, &ra); + if (err) + goto err_truncate; + } + + if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) + err = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + err = nfsd4_encode_readv(resp, read, file, maxcount); + + if (!read->rd_filp) + nfsd_put_tmp_read_open(file, ra); + +err_truncate: + if (err) + xdr_truncate_encode(xdr, starting_len); + return err; +} - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); +static __be32 +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +{ + int maxcount; + __be32 wire_count; + int zero = 0; + struct xdr_stream *xdr = &resp->xdr; + int length_offset = xdr->buf->len; + __be32 *p; + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; maxcount = PAGE_SIZE; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, maxcount); + if (!p) + return nfserr_resource; /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If * not, one easy fix is: if ->readlink() precisely fills the buffer, * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ - nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, + (char *)p, &maxcount); if (nfserr == nfserr_isdir) - return nfserr_inval; - if (nfserr) + nfserr = nfserr_inval; + if (nfserr) { + xdr_truncate_encode(xdr, length_offset); return nfserr; - - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } + + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); return 0; } @@ -2999,45 +3291,52 @@ static __be32 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { int maxcount; + int bytes_left; loff_t offset; - __be32 *page, *savep, *tailbase; + __be64 wire_offset; + struct xdr_stream *xdr = &resp->xdr; + int starting_len = xdr->buf->len; __be32 *p; if (nfserr) return nfserr; - if (resp->xbuf->page_len) - return nfserr_resource; - RESERVE_SPACE(8); /* verifier */ - savep = p; + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; - tailbase = p; - - maxcount = PAGE_SIZE; - if (maxcount > readdir->rd_maxcount) - maxcount = readdir->rd_maxcount; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; /* - * Convert from bytes to words, account for the two words already - * written, make sure to leave two words at the end for the next - * pointer and eof field. + * Number of bytes left for directory entries allowing for the + * final 8 bytes of the readdir and a following failed op: */ - maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) { - nfserr = nfserr_toosmall; + bytes_left = xdr->buf->buflen - xdr->buf->len + - COMPOUND_ERR_SLACK_SPACE - 8; + if (bytes_left < 0) { + nfserr = nfserr_resource; goto err_no_verf; } + maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); + /* + * Note the rfc defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier above + * and the 8 bytes encoded at the end of this function: + */ + if (maxcount < 16) { + nfserr = nfserr_toosmall; + goto err_no_verf; + } + maxcount = min_t(int, maxcount-16, bytes_left); - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + readdir->xdr = xdr; + readdir->rd_maxcount = maxcount; readdir->common.err = 0; - readdir->buflen = maxcount; - readdir->buffer = page; - readdir->offset = NULL; + readdir->cookie_offset = 0; offset = readdir->rd_cookie; nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -3045,42 +3344,49 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && readdir->common.err == nfserr_toosmall && - readdir->buffer == page) - nfserr = nfserr_toosmall; + xdr->buf->len == starting_len + 8) { + /* nothing encoded; which limit did we hit?: */ + if (maxcount - 16 < bytes_left) + /* It was the fault of rd_maxcount: */ + nfserr = nfserr_toosmall; + else + /* We ran out of buffer space: */ + nfserr = nfserr_resource; + } if (nfserr) goto err_no_verf; - if (readdir->offset) - xdr_encode_hyper(readdir->offset, offset); + if (readdir->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, + &wire_offset, 8); + } - p = readdir->buffer; + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + goto err_no_verf; + } *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - (char*)page_address( - resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = tailbase; - resp->xbuf->tail[0].iov_len = 0; - resp->p = resp->xbuf->tail[0].iov_base; - resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; return 0; err_no_verf: - p = savep; - ADJUST_ARGS(); + xdr_truncate_encode(xdr, starting_len); return nfserr; } static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - write_cinfo(&p, &remove->rm_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); } return nfserr; } @@ -3088,29 +3394,32 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(40); - write_cinfo(&p, &rename->rn_sinfo); - write_cinfo(&p, &rename->rn_tinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); } return nfserr; } static __be32 -nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, - __be32 nfserr,struct svc_export *exp) +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, + __be32 nfserr, struct svc_export *exp) { - int i = 0; - u32 nflavs; + u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - __be32 *p; + __be32 *p, *flavorsp; + static bool report = true; if (nfserr) goto out; + nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3131,34 +3440,43 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, } } - RESERVE_SPACE(4); - WRITE32(nflavs); - ADJUST_ARGS(); + supported = 0; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + flavorsp = p++; /* to be backfilled later */ + for (i = 0; i < nflavs; i++) { - u32 flav = flavs[i].pseudoflavor; - struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); - - if (gm) { - RESERVE_SPACE(4); - WRITE32(RPC_AUTH_GSS); - ADJUST_ARGS(); - RESERVE_SPACE(4 + gm->gm_oid.len); - WRITE32(gm->gm_oid.len); - WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(0); /* qop */ - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(gss_pseudoflavor_to_service(gm, flav)); - ADJUST_ARGS(); - gss_mech_put(gm); + rpc_authflavor_t pf = flavs[i].pseudoflavor; + struct rpcsec_gss_info info; + + if (rpcauth_get_gssinfo(pf, &info) == 0) { + supported++; + p = xdr_reserve_space(xdr, 4 + 4 + + XDR_LEN(info.oid.len) + 4 + 4); + if (!p) + goto out; + *p++ = cpu_to_be32(RPC_AUTH_GSS); + p = xdr_encode_opaque(p, info.oid.data, info.oid.len); + *p++ = cpu_to_be32(info.qop); + *p++ = cpu_to_be32(info.service); + } else if (pf < RPC_AUTH_MAXFLAVOR) { + supported++; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + *p++ = cpu_to_be32(pf); } else { - RESERVE_SPACE(4); - WRITE32(flav); - ADJUST_ARGS(); + if (report) + pr_warn("NFS: SECINFO: security flavor %u " + "is not supported\n", pf); } } + + if (nflavs != supported) + report = false; + *flavorsp = htonl(supported); + nfserr = 0; out: if (exp) exp_put(exp); @@ -3169,14 +3487,18 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); } static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); } /* @@ -3186,39 +3508,47 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; - RESERVE_SPACE(12); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; if (nfserr) { - WRITE32(2); - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } else { - WRITE32(2); - WRITE32(setattr->sa_bmval[0]); - WRITE32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(setattr->sa_bmval[0]); + *p++ = cpu_to_be32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(setattr->sa_bmval[2]); } - ADJUST_ARGS(); return nfserr; } static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + sizeof(nfs4_verifier)); - WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); + p = xdr_encode_opaque_fixed(p, &scd->se_confirm, + NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { - RESERVE_SPACE(8); - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } return nfserr; } @@ -3226,22 +3556,34 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { - RESERVE_SPACE(16); - WRITE32(write->wr_bytes_written); - WRITE32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, 8); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); } return nfserr; } +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + static __be32 -nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -3257,163 +3599,165 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, server_scope = utsname()->nodename; server_scope_sz = strlen(server_scope); - RESERVE_SPACE( + p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + 4 /* eir_sequenceid */ + 4 /* eir_flags */ + - 4 /* spr_how (SP4_NONE) */ + + 4 /* spr_how */); + if (!p) + return nfserr_resource; + + p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); + *p++ = cpu_to_be32(exid->seqid); + *p++ = cpu_to_be32(exid->flags); + + *p++ = cpu_to_be32(exid->spa_how); + + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce, spo_must_allow */ + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + + /* spo_must_enforce bitmap: */ + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + *p++ = cpu_to_be32(0); + + break; + default: + WARN_ON_ONCE(1); + } + + p = xdr_reserve_space(xdr, 8 /* so_minor_id */ + 4 /* so_major_id.len */ + (XDR_QUADLEN(major_id_sz) * 4) + 4 /* eir_server_scope.len */ + (XDR_QUADLEN(server_scope_sz) * 4) + 4 /* eir_server_impl_id.count (0) */); - - WRITEMEM(&exid->clientid, 8); - WRITE32(exid->seqid); - WRITE32(exid->flags); - - /* state_protect4_r. Currently only support SP4_NONE */ - BUG_ON(exid->spa_how != SP4_NONE); - WRITE32(exid->spa_how); + if (!p) + return nfserr_resource; /* The server_owner struct */ - WRITE64(minor_id); /* Minor id */ + p = xdr_encode_hyper(p, minor_id); /* Minor id */ /* major id */ - WRITE32(major_id_sz); - WRITEMEM(major_id, major_id_sz); + p = xdr_encode_opaque(p, major_id, major_id_sz); /* Server scope */ - WRITE32(server_scope_sz); - WRITEMEM(server_scope, server_scope_sz); + p = xdr_encode_opaque(p, server_scope, server_scope_sz); /* Implementation id */ - WRITE32(0); /* zero length nfs_impl_id4 array */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; } static __be32 -nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, +nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(24); - WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(sess->seqid); - WRITE32(sess->flags); - ADJUST_ARGS(); - - RESERVE_SPACE(28); - WRITE32(0); /* headerpadsz */ - WRITE32(sess->fore_channel.maxreq_sz); - WRITE32(sess->fore_channel.maxresp_sz); - WRITE32(sess->fore_channel.maxresp_cached); - WRITE32(sess->fore_channel.maxops); - WRITE32(sess->fore_channel.maxreqs); - WRITE32(sess->fore_channel.nr_rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 24); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, sess->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(sess->seqid); + *p++ = cpu_to_be32(sess->flags); + + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->fore_channel.maxops); + *p++ = cpu_to_be32(sess->fore_channel.maxreqs); + *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); if (sess->fore_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); - WRITE32(sess->fore_channel.rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); } - RESERVE_SPACE(28); - WRITE32(0); /* headerpadsz */ - WRITE32(sess->back_channel.maxreq_sz); - WRITE32(sess->back_channel.maxresp_sz); - WRITE32(sess->back_channel.maxresp_cached); - WRITE32(sess->back_channel.maxops); - WRITE32(sess->back_channel.maxreqs); - WRITE32(sess->back_channel.nr_rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->back_channel.maxops); + *p++ = cpu_to_be32(sess->back_channel.maxreqs); + *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); if (sess->back_channel.nr_rdma_attrs) { - RESERVE_SPACE(4); - WRITE32(sess->back_channel.rdma_attrs); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); } return 0; } static __be32 -nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, - struct nfsd4_destroy_session *destroy_session) -{ - return nfserr; -} - -static __be32 -nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, - struct nfsd4_free_stateid *free_stateid) -{ - __be32 *p; - - if (nfserr) - return nfserr; - - RESERVE_SPACE(4); - WRITE32(nfserr); - ADJUST_ARGS(); - return nfserr; -} - -static __be32 -nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, +nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) return nfserr; - RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); - WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - WRITE32(seq->seqid); - WRITE32(seq->slotid); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, seq->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(seq->seqid); + *p++ = cpu_to_be32(seq->slotid); /* Note slotid's are numbered from zero: */ - WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ - WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ - WRITE32(seq->status_flags); + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ + *p++ = cpu_to_be32(seq->status_flags); - ADJUST_ARGS(); - resp->cstate.datap = p; /* DRC cache data pointer */ + resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ return 0; } -__be32 -nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, +static __be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { - struct nfsd4_compoundargs *argp; - struct nfs4_client *cl = resp->cstate.session->se_client; - stateid_t si; + struct xdr_stream *xdr = &resp->xdr; + struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - int i; - int valid; - restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp); - argp = test_stateid->ts_saved_args; + if (nfserr) + return nfserr; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); + if (!p) + return nfserr_resource; *p++ = htonl(test_stateid->ts_num_ids); - resp->p = p; - nfs4_lock_state(); - for (i = 0; i < test_stateid->ts_num_ids; i++) { - nfsd4_decode_stateid(argp, &si); - valid = nfs4_validate_stateid(cl, &si); - RESERVE_SPACE(4); - *p++ = htonl(valid); - resp->p = p; + list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { + *p++ = stateid->ts_id_status; } - nfs4_unlock_state(); return nfserr; } @@ -3475,8 +3819,8 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, @@ -3493,77 +3837,99 @@ static nfsd4_enc nfsd4_enc_ops[] = { }; /* - * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation with pad. - * - * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() - * which was specified at nfsd4_operation, else pad is zero. - * - * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + * - For NFS versions >=4.1, the size of the reply must stay within + * session limits + * - For all NFS versions, we must stay within limited preallocated + * buffer space. * - * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so - * will be at least a page and will therefore hold the xdr_buf head. + * This is called before the operation is processed, so can only provide + * an upper estimate. For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects. */ -int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) { - struct xdr_buf *xb = &resp->rqstp->rq_res; - struct nfsd4_session *session = NULL; + struct xdr_buf *buf = &resp->rqstp->rq_res; struct nfsd4_slot *slot = resp->cstate.slot; - u32 length, tlen = 0; + if (buf->len + respsize <= buf->buflen) + return nfs_ok; if (!nfsd4_has_session(&resp->cstate)) - return 0; - - session = resp->cstate.session; - if (session == NULL) - return 0; - - if (xb->page_len == 0) { - length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; - } else { - if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) - tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; - - length = xb->head[0].iov_len + xb->page_len + tlen + pad; - } - dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, - length, xb->page_len, tlen, pad); - - if (length > session->se_fchannel.maxresp_sz) - return nfserr_rep_too_big; - - if (slot->sl_cachethis == 1 && - length > session->se_fchannel.maxresp_cached) + return nfserr_resource; + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { + WARN_ON_ONCE(1); return nfserr_rep_too_big_to_cache; - - return 0; + } + return nfserr_rep_too_big; } void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - __be32 *statp; + struct xdr_stream *xdr = &resp->xdr; + struct nfs4_stateowner *so = resp->cstate.replay_owner; + struct svc_rqst *rqstp = resp->rqstp; + int post_err_offset; + nfsd4_enc encoder; __be32 *p; - RESERVE_SPACE(8); - WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); + post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); - op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); - /* nfsd4_check_drc_limit guarantees enough room for error status */ - if (!op->status) - op->status = nfsd4_check_resp_size(resp, 0); + encoder = nfsd4_enc_ops[op->opnum]; + op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + + /* nfsd4_check_resp_size guarantees enough room for error status */ + if (!op->status) { + int space_needed = 0; + if (!nfsd4_last_compound_op(rqstp)) + space_needed = COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, space_needed); + } + if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; + + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) + op->status = nfserr_rep_too_big_to_cache; + else + op->status = nfserr_rep_too_big; + } + if (op->status == nfserr_resource || + op->status == nfserr_rep_too_big || + op->status == nfserr_rep_too_big_to_cache) { + /* + * The operation may have already been encoded or + * partially encoded. No op returns anything additional + * in the case of one of these three errors, so we can + * just truncate back to after the status. But it's a + * bug if we had to do this on a non-idempotent op: + */ + warn_on_nonidempotent_op(op); + xdr_truncate_encode(xdr, post_err_offset); + } + if (so) { + int len = xdr->buf->len - post_err_offset; + + so->so_replay.rp_status = op->status; + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + so->so_replay.rp_buf, len); + } status: - /* - * Note: We write the status directly, instead of using WRITE32(), - * since it is already in network byte order. - */ - *statp = op->status; + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* @@ -3575,21 +3941,22 @@ status: * called with nfs4_lock_state() held */ void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); - RESERVE_SPACE(8); - WRITE32(op->opnum); + p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - ADJUST_ARGS(); - RESERVE_SPACE(rp->rp_buflen); - WRITEMEM(rp->rp_buf, rp->rp_buflen); - ADJUST_ARGS(); + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } int @@ -3621,6 +3988,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) { + if (rqstp->rq_arg.head[0].iov_len % 4) { + /* client is nuts */ + dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", + __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); + return 0; + } args->p = p; args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; @@ -3640,28 +4013,31 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo * All that remains is to write the tag and operation count... */ struct nfsd4_compound_state *cs = &resp->cstate; - struct kvec *iov; + struct xdr_buf *buf = resp->xdr.buf; + + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); + + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (rqstp->rq_res.page_len) - iov = &rqstp->rq_res.tail[0]; - else - iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; - BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - cs->slot->sl_inuse = false; + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - release_session_client(cs->session); + spin_lock(&nn->client_lock); nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); } return 1; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2cbac34a55d..6040da8830f 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,34 +9,66 @@ */ #include <linux/slab.h> +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <linux/log2.h> +#include <linux/hash.h> +#include <net/checksum.h> #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + +/* + * We use this value to determine the number of hash buckets from the max + * cache size, the idea being that when the cache is at its maximum number + * of entries, then this should be the average number of entries per bucket. */ -#define CACHESIZE 1024 -#define HASHSIZE 64 +#define TARGET_BUCKET_SIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; + +/* max number of entries allowed in the cache */ +static unsigned int max_drc_entries; + +/* number of significant bits in the hash value */ +static unsigned int maskbits; /* - * Calculate the hash index from an XID. + * Stats and other tracking of on the duplicate reply cache. All of these and + * the "rc" fields in nfsdstats are protected by the cache_lock */ -static inline u32 request_hash(u32 xid) -{ - u32 h = xid; - h ^= (xid >> 24); - return h & (HASHSIZE-1); -} + +/* total number of entries */ +static unsigned int num_drc_entries; + +/* cache misses due only to checksum comparison failures */ +static unsigned int payload_misses; + +/* amount of memory (in bytes) currently consumed by the DRC */ +static unsigned int drc_mem_usage; + +/* longest hash chain seen */ +static unsigned int longest_chain; + +/* size of cache when we saw the longest hash chain */ +static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); + +static struct shrinker nfsd_reply_cache_shrinker = { + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, + .seeks = 1, +}; /* * locking for the reply cache: @@ -44,30 +76,104 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +/* + * Compute the number of hash buckets we need. Divide the max cachesize by + * the "target" max bucket size, and round up to next power of two. + */ +static unsigned int +nfsd_hashsize(unsigned int limit) +{ + return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} + +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { + drc_mem_usage -= rp->c_replvec.iov_len; + kfree(rp->c_replvec.iov_base); + } + if (!hlist_unhashed(&rp->c_hash)) + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + drc_mem_usage -= sizeof(*rp); + kmem_cache_free(drc_slab, rp); +} - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + unsigned int hashsize; + + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + hashsize = nfsd_hashsize(max_drc_entries); + maskbits = ilog2(hashsize); + + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; + + cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; - cache_disabled = 0; return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -79,27 +185,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -109,89 +221,236 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + long freed = 0; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) + break; + nfsd_reply_cache_free_locked(rp); + freed++; + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long num; + + spin_lock(&cache_lock); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +static bool +nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) +{ + /* Check RPC header info first */ + if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + + /* compare checksum of NFS data */ + if (csum != rp->c_csum) { + ++payload_misses; + return false; + } + + return true; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp, *ret = NULL; + struct hlist_head *rh; + unsigned int entries = 0; + + rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + hlist_for_each_entry(rp, rh, c_hash) { + ++entries; + if (nfsd_cache_match(rqstp, csum, rp)) { + ret = rp; + break; + } + } + + /* tally hash chain length stats */ + if (entries > longest_chain) { + longest_chain = entries; + longest_chain_cachesize = num_drc_entries; + } else if (entries == longest_chain) { + /* prefer to keep the smallest cachesize possible here */ + longest_chain_cachesize = min(longest_chain_cachesize, + num_drc_entries); + } + + return ret; } /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; - int rtn; + int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return RC_DOIT; + return rtn; } - spin_lock(&cache_lock); - rtn = RC_DOIT; + csum = nfsd_cache_csum(rqstp); - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; - } + /* + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. + */ + rp = nfsd_reply_cache_alloc(); + spin_lock(&cache_lock); + if (likely(rp)) { + ++num_drc_entries; + drc_mem_usage += sizeof(*rp); } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } - } - } + /* go ahead and prune the cache */ + prune_cache_entries(); - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; + found = nfsd_cache_search(rqstp, csum); + if (found) { + if (likely(rp)) + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; + } - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); goto out; } + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { + drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); rp->c_replvec.iov_base = NULL; } @@ -201,9 +460,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -232,7 +491,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -257,11 +516,12 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; + size_t bufsize = 0; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); @@ -269,7 +529,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -281,23 +541,25 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) break; case RC_REPLBUFF: cachv = &rp->c_replvec; - cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + bufsize = len << 2; + cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } - cachv->iov_len = len << 2; - memcpy(cachv->iov_base, statp, len << 2); + cachv->iov_len = bufsize; + memcpy(cachv->iov_base, statp, bufsize); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); + drc_mem_usage += bufsize; lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } @@ -321,3 +583,30 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) vec->iov_len += data->iov_len; return 1; } + +/* + * Note that fields may be added, removed or reordered in the future. Programs + * scraping this file for info should test the labels to ensure they're + * getting the correct field. + */ +static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +{ + spin_lock(&cache_lock); + seq_printf(m, "max entries: %u\n", max_drc_entries); + seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "hash buckets: %u\n", 1 << maskbits); + seq_printf(m, "mem usage: %u\n", drc_mem_usage); + seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); + seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); + seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); + seq_printf(m, "payload misses: %u\n", payload_misses); + seq_printf(m, "longest chain len: %u\n", longest_chain); + seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + spin_unlock(&cache_lock); + return 0; +} + +int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_reply_cache_stats_show, NULL); +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 748eda93ce5..51844048937 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,15 +10,17 @@ #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> #include "idmap.h" #include "nfsd.h" #include "cache.h" -#include "fault_inject.h" +#include "state.h" +#include "netns.h" /* * We have a single directory with several nodes in it. @@ -33,6 +35,7 @@ enum { NFSD_Threads, NFSD_Pool_Threads, NFSD_Pool_Stats, + NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -83,7 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { - ino_t ino = file->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; @@ -123,13 +126,41 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { - return seq_open(file, &nfs_exports_op); + int err; + struct seq_file *seq; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + err = seq_open(file, &nfs_exports_op); + if (err) + return err; + + seq = file->private_data; + seq->private = nn->svc_export_cache; + return 0; +} + +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); } -static const struct file_operations exports_operations = { - .open = exports_open, +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -147,7 +178,7 @@ static int export_features_open(struct inode *inode, struct file *file) return single_open(file, export_features_show, NULL); } -static struct file_operations export_features_operations = { +static const struct file_operations export_features_operations = { .open = export_features_open, .read = seq_read, .llseek = seq_lseek, @@ -166,7 +197,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file) return single_open(file, supported_enctypes_show, NULL); } -static struct file_operations supported_enctypes_ops = { +static const struct file_operations supported_enctypes_ops = { .open = supported_enctypes_open, .read = seq_read, .llseek = seq_lseek, @@ -174,9 +205,6 @@ static struct file_operations supported_enctypes_ops = { }; #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ -extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); -extern int nfsd_pool_stats_release(struct inode *inode, struct file *file); - static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, @@ -185,6 +213,13 @@ static const struct file_operations pool_stats_operations = { .owner = THIS_MODULE, }; +static struct file_operations reply_cache_stats_operations = { + .open = nfsd_reply_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -211,6 +246,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -223,7 +259,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -308,6 +344,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -343,7 +380,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -387,6 +424,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; + if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); @@ -394,11 +433,11 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return rv; if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(NFS_PORT, newthreads); + rv = nfsd_svc(newthreads, net); if (rv < 0) return rv; } else - rv = nfsd_nrthreads(); + rv = nfsd_nrthreads(net); return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } @@ -436,9 +475,10 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); - npools = nfsd_nrpools(); + npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by @@ -466,12 +506,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) if (nthreads[i] < 0) goto out_free; } - rv = nfsd_set_nrthreads(i, nthreads); + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } - rv = nfsd_get_nrthreads(npools, nthreads); + rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; @@ -498,11 +538,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { - if (nfsd_serv) + if (nn->nfsd_serv) /* Cannot change versions without updating - * nfsd_serv->sv_xdrsize, and reallocing + * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; @@ -520,7 +562,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) @@ -633,11 +675,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) * Zero-length write. Return a list of NFSD's current listener * transports. */ -static ssize_t __write_ports_names(char *buf) +static ssize_t __write_ports_names(char *buf, struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; - return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); + return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); } /* @@ -645,134 +689,92 @@ static ssize_t __write_ports_names(char *buf) * a socket of a supported family/protocol, and we use it as an * nfsd listener. */ -static ssize_t __write_ports_addfd(char *buf) +static ssize_t __write_ports_addfd(char *buf, struct net *net) { char *mesg = buf; int fd, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; - err = nfsd_create_serv(); + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return err; } /* - * A '-' followed by the 'name' of a socket means we close the socket. - */ -static ssize_t __write_ports_delfd(char *buf) -{ - char *toclose; - int len = 0; - - toclose = kstrdup(buf + 1, GFP_KERNEL); - if (toclose == NULL) - return -ENOMEM; - - if (nfsd_serv != NULL) - len = svc_sock_names(nfsd_serv, buf, - SIMPLE_TRANSACTION_LIMIT, toclose); - kfree(toclose); - return len; -} - -/* * A transport listener is added by writing it's transport name and * a port number. */ -static ssize_t __write_ports_addxprt(char *buf) +static ssize_t __write_ports_addxprt(char *buf, struct net *net) { char transport[16]; struct svc_xprt *xprt; int port, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (sscanf(buf, "%15s %4u", transport, &port) != 2) + if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; if (port < 1 || port > USHRT_MAX) return -EINVAL; - err = nfsd_create_serv(); + err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_create_xprt(nfsd_serv, transport, &init_net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET, port, SVC_SOCK_ANONYMOUS); if (err < 0) goto out_err; - err = svc_create_xprt(nfsd_serv, transport, &init_net, + err = svc_create_xprt(nn->nfsd_serv, transport, net, PF_INET6, port, SVC_SOCK_ANONYMOUS); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; /* Decrease the count, but don't shut down the service */ - nfsd_serv->sv_nrthreads--; + nn->nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); + xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); } out_err: - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } -/* - * A transport listener is removed by writing a "-", it's transport - * name, and it's port number. - */ -static ssize_t __write_ports_delxprt(char *buf) -{ - struct svc_xprt *xprt; - char transport[16]; - int port; - - if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) - return -EINVAL; - - if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) - return -EINVAL; - - xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); - if (xprt == NULL) - return -ENOTCONN; - - svc_close_xprt(xprt); - svc_xprt_put(xprt); - return 0; -} - -static ssize_t __write_ports(struct file *file, char *buf, size_t size) +static ssize_t __write_ports(struct file *file, char *buf, size_t size, + struct net *net) { if (size == 0) - return __write_ports_names(buf); + return __write_ports_names(buf, net); if (isdigit(buf[0])) - return __write_ports_addfd(buf); - - if (buf[0] == '-' && isdigit(buf[1])) - return __write_ports_delfd(buf); + return __write_ports_addfd(buf, net); if (isalpha(buf[0])) - return __write_ports_addxprt(buf); - - if (buf[0] == '-' && isalpha(buf[1])) - return __write_ports_delxprt(buf); + return __write_ports_addxprt(buf, net); return -EINVAL; } @@ -811,21 +813,6 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) * OR * * Input: - * buf: C string containing a "-" followed - * by an integer value representing a - * previously passed in socket file - * descriptor - * size: non-zero length of C string in @buf - * Output: - * On success: NFS service no longer listens on that socket; - * passed-in buffer filled with a '\n'-terminated C - * string containing a unique name of the listener; - * return code is the size in bytes of the string - * On error: return code is a negative errno value - * - * OR - * - * Input: * buf: C string containing a transport * name and an unsigned integer value * representing the port to listen on, @@ -834,26 +821,14 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) * Output: * On success: returns zero; NFS service is started * On error: return code is a negative errno value - * - * OR - * - * Input: - * buf: C string containing a "-" followed - * by a transport name and an unsigned - * integer value representing the port - * to listen on, separated by whitespace - * size: non-zero length of C string in @buf - * Output: - * On success: returns zero; NFS service no longer listens - * on that transport - * On error: return code is a negative errno value */ static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); - rv = __write_ports(file, buf, size); + rv = __write_ports(file, buf, size, net); mutex_unlock(&nfsd_mutex); return rv; } @@ -885,6 +860,9 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); @@ -899,7 +877,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); - if (nfsd_serv) { + if (nn->nfsd_serv) { mutex_unlock(&nfsd_mutex); return -EBUSY; } @@ -912,13 +890,14 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) } #ifdef CONFIG_NFSD_V4 -static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; rv = get_int(&mesg, &i); if (rv) @@ -943,12 +922,13 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, tim return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); } -static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time) +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { ssize_t rv; mutex_lock(&nfsd_mutex); - rv = __nfsd4_write_time(file, buf, size, time); + rv = __nfsd4_write_time(file, buf, size, time, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -976,7 +956,9 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_ */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_lease); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } /** @@ -991,19 +973,20 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - return nfsd4_write_time(file, buf, size, &nfsd4_grace); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } -extern char *nfs4_recoverydir(void); - -static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, + struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; if (size > 0) { - if (nfsd_serv) + if (nn->nfsd_serv) return -EBUSY; if (size > PATH_MAX || buf[size-1] != '\n') return -EINVAL; @@ -1047,9 +1030,11 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); - rv = __write_recoverydir(file, buf, size); + rv = __write_recoverydir(file, buf, size, nn); mutex_unlock(&nfsd_mutex); return rv; } @@ -1064,7 +1049,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1075,6 +1060,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, @@ -1088,21 +1074,37 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) @@ -1112,9 +1114,12 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); - if (!entry) + entry = proc_create("exports", 0, entry, + &exports_proc_operations); + if (!entry) { + remove_proc_entry("fs/nfs", NULL); return -ENOMEM; + } return 0; } #else /* CONFIG_PROC_FS */ @@ -1124,15 +1129,56 @@ static int create_proc_exports_entry(void) } #endif +int nfsd_net_id; + +static __net_init int nfsd_init_net(struct net *net) +{ + int retval; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + retval = nfsd_export_init(net); + if (retval) + goto out_export_error; + retval = nfsd_idmap_init(net); + if (retval) + goto out_idmap_error; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + return 0; + +out_idmap_error: + nfsd_export_shutdown(net); +out_export_error: + return retval; +} + +static __net_exit void nfsd_exit_net(struct net *net) +{ + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); +} + +static struct pernet_operations nfsd_net_ops = { + .init = nfsd_init_net, + .exit = nfsd_exit_net, + .id = &nfsd_net_id, + .size = sizeof(struct nfsd_net), +}; + static int __init init_nfsd(void) { int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfsd4_init_slabs(); + retval = register_cld_notifier(); if (retval) return retval; - nfs4_state_init(); + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) + goto out_unregister_notifier; + retval = nfsd4_init_slabs(); + if (retval) + goto out_unregister_pernet; retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) goto out_free_slabs; @@ -1140,16 +1186,10 @@ static int __init init_nfsd(void) retval = nfsd_reply_cache_init(); if (retval) goto out_free_stat; - retval = nfsd_export_init(); - if (retval) - goto out_free_cache; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = nfsd_idmap_init(); - if (retval) - goto out_free_lockd; retval = create_proc_exports_entry(); if (retval) - goto out_free_idmap; + goto out_free_lockd; retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; @@ -1157,33 +1197,33 @@ static int __init init_nfsd(void) out_free_all: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); -out_free_idmap: - nfsd_idmap_shutdown(); out_free_lockd: nfsd_lockd_shutdown(); - nfsd_export_shutdown(); -out_free_cache: nfsd_reply_cache_shutdown(); out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); out_free_slabs: nfsd4_free_slabs(); +out_unregister_pernet: + unregister_pernet_subsys(&nfsd_net_ops); +out_unregister_notifier: + unregister_cld_notifier(); return retval; } static void __exit exit_nfsd(void) { - nfsd_export_shutdown(); nfsd_reply_cache_shutdown(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); - nfsd_idmap_shutdown(); nfsd4_free_slabs(); nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); + unregister_pernet_subsys(&nfsd_net_ops); + unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1d1e8589b4c..847daf37e56 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -15,16 +15,25 @@ #include <linux/nfs2.h> #include <linux/nfs3.h> #include <linux/nfs4.h> +#include <linux/sunrpc/svc.h> #include <linux/sunrpc/msg_prot.h> -#include <linux/nfsd/debug.h> -#include <linux/nfsd/export.h> -#include <linux/nfsd/stats.h> +#include <uapi/linux/nfsd/debug.h> + +#include "stats.h" +#include "export.h" + +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 1 +#define NFSD_SUPPORTED_MINOR_VERSION 2 /* * Maximum blocksizes supported by daemon under various circumstances. */ @@ -53,25 +62,27 @@ struct readdir_cd { extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; -extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; -extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; -extern unsigned int nfsd_drc_max_mem; -extern unsigned int nfsd_drc_mem_used; +extern unsigned long nfsd_drc_max_mem; +extern unsigned long nfsd_drc_mem_used; extern const struct seq_operations nfs_exports_op; /* * Function prototypes. */ -int nfsd_svc(unsigned short port, int nrservs); +int nfsd_svc(int nrservs, struct net *net); int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); -int nfsd_nrthreads(void); -int nfsd_nrpools(void); -int nfsd_get_nrthreads(int n, int *); -int nfsd_set_nrthreads(int n, int *); +int nfsd_nrthreads(struct net *); +int nfsd_nrpools(struct net *); +int nfsd_get_nrthreads(int n, int *, struct net *); +int nfsd_set_nrthreads(int n, int *, struct net *); +int nfsd_pool_stats_open(struct inode *, struct file *); +int nfsd_pool_stats_release(struct inode *, struct file *); + +void nfsd_destroy(struct net *net); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL @@ -90,7 +101,7 @@ enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; int nfsd_vers(int vers, enum vers_op change); int nfsd_minorversion(u32 minorversion, enum vers_op change); void nfsd_reset_versions(void); -int nfsd_create_serv(void); +int nfsd_create_serv(struct net *net); extern int nfsd_max_blksize; @@ -103,22 +114,26 @@ static inline int nfsd_v4client(struct svc_rqst *rq) * NFSv4 State */ #ifdef CONFIG_NFSD_V4 -extern unsigned int max_delegations; -void nfs4_state_init(void); +extern unsigned long max_delegations; int nfsd4_init_slabs(void); void nfsd4_free_slabs(void); int nfs4_state_start(void); +int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); +void nfs4_state_shutdown_net(struct net *net); void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); +char * nfs4_recoverydir(void); #else -static inline void nfs4_state_init(void) { } static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } static inline int nfs4_state_start(void) { return 0; } +static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } +static inline void nfs4_state_shutdown_net(struct net *net) { } static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } +static inline char * nfs4_recoverydir(void) {return NULL; } #endif /* @@ -234,6 +249,12 @@ void nfsd_lockd_shutdown(void); #define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) #define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) +#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) +#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) +#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) +#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) +#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. @@ -250,16 +271,8 @@ void nfsd_lockd_shutdown(void); /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) -/* - * Time of server startup - */ -extern struct timeval nfssvc_boot; - #ifdef CONFIG_NFSD_V4 -extern time_t nfsd4_lease; -extern time_t nfsd4_grace; - /* before processing a COMPOUND operation, we have to check that there * is enough space in the buffer for XDR encode to succeed. otherwise, * we might process an operation with side effects, and be unable to @@ -276,7 +289,7 @@ extern time_t nfsd4_grace; * reason. */ #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ -#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ +#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ @@ -321,6 +334,13 @@ extern time_t nfsd4_grace; #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 @@ -335,8 +355,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) static inline u32 nfsd_suppattrs2(u32 minorversion) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 - : NFSD4_SUPPORTED_ATTRS_WORD2; + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ @@ -349,7 +372,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 @@ -364,12 +391,17 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) NFSD_WRITEABLE_ATTRS_WORD2 extern int nfsd4_is_junction(struct dentry *dentry); -#else +extern int register_cld_notifier(void); +extern void unregister_cld_notifier(void); +#else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +#define register_cld_notifier() 0 +#define unregister_cld_notifier() do { } while(0) + #endif /* CONFIG_NFSD_V4 */ #endif /* LINUX_NFSD_NFSD_H */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 68454e75fce..ec839341815 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, /* Check if the request originated from a secure port. */ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk(KERN_WARNING - "nfsd: request from insecure port %s!\n", - svc_print_addr(rqstp, buf, sizeof(buf))); + dprintk("nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); return nfserr_perm; } @@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); } else { __u32 tfh[2]; dev_t xdev; @@ -253,8 +252,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", + dentry); } fhp->fh_dentry = dentry; @@ -361,10 +360,9 @@ skip_pseudoflavor_check: error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %s/%s permission failure, " + dprintk("fh_verify: %pd2 permission failure, " "acc=%x, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, + dentry, access, ntohl(error)); } out: @@ -386,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -514,14 +512,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; - __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, + dentry, (inode ? inode->i_ino : 0)); /* Choose filehandle version and fsid type based on @@ -534,13 +530,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", + dentry); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", fhp->fh_maxsize, - parent->d_name.name, dentry->d_name.name); + dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; @@ -559,20 +555,19 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fhp->fh_handle.fh_fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) { + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); return nfserr_opnotsupp; } @@ -600,22 +595,20 @@ fh_update(struct svc_fh *fhp) _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - goto out; + return 0; _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } -out: return 0; - out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); - goto out; + return nfserr_serverfault; out_negative: - printk(KERN_ERR "fh_update: %s/%s still negative!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; + printk(KERN_ERR "fh_update: %pd2 still negative!\n", + dentry); + return nfserr_serverfault; } /* @@ -635,8 +628,9 @@ fh_put(struct svc_fh *fhp) fhp->fh_post_saved = 0; #endif } + fh_drop_write(fhp); if (exp) { - cache_put(&exp->h, &svc_export_cache); + exp_put(exp); fhp->fh_export = NULL; } return; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index e5e6707ba68..2e89e70ac15 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -1,9 +1,58 @@ -/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include <linux/sunrpc/svc.h> +#include <uapi/linux/nfsd/nfsfh.h> + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} -#ifndef _LINUX_NFSD_FH_INT_H -#define _LINUX_NFSD_FH_INT_H +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} + +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ -#include <linux/nfsd/nfsfh.h> + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ + +} svc_fh; enum nfsd_fsid { FSID_DEV = 0, @@ -133,6 +182,17 @@ fh_init(struct svc_fh *fhp, int maxsize) #ifdef CONFIG_NFSD_V3 /* + * The wcc data stored in current_fh should be cleared + * between compound ops. + */ +static inline void +fh_clear_wcc(struct svc_fh *fhp) +{ + fhp->fh_post_saved = 0; + fhp->fh_pre_saved = 0; +} + +/* * Fill in the pre_op attr for the wcc data */ static inline void @@ -152,7 +212,8 @@ fill_pre_wcc(struct svc_fh *fhp) extern void fill_post_wcc(struct svc_fh *); #else -#define fill_pre_wcc(ignored) +#define fh_clear_wcc(ignored) +#define fill_pre_wcc(ignored) #define fill_post_wcc(notused) #endif /* CONFIG_NFSD_V3 */ @@ -173,8 +234,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) BUG_ON(!dentry); if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); return; } @@ -203,4 +264,4 @@ fh_unlock(struct svc_fh *fhp) } } -#endif /* _LINUX_NFSD_FH_INT_H */ +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e15dc45fc5e..54c6b3d3cc7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -26,17 +26,13 @@ static __be32 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } static __be32 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* * Get a file's attributes @@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, &resp->count); if (nfserr) return nfserr; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* @@ -196,6 +190,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, struct dentry *dchild; int type, mode; __be32 nfserr; + int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); dprintk("nfsd: CREATE %s %.*s\n", @@ -214,6 +209,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; + hosterr = fh_want_write(dirfhp); + if (hosterr) { + nfserr = nfserrno(hosterr); + goto done; + } + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { @@ -330,7 +331,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, out_unlock: /* We don't really need to unlock, as fh_put does it. */ fh_unlock(dirfhp); - + fh_drop_write(dirfhp); done: fh_put(dirfhp); return nfsd_return_dirop(nfserr, resp); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index eda7d7e55e0..1879e43f286 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -21,19 +21,19 @@ #include "nfsd.h" #include "cache.h" #include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_SVC extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); -struct timeval nfssvc_boot; /* - * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members * of the svc_serv struct. In particular, ->sv_nrthreads but also to some * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt * - * If (out side the lock) nfsd_serv is non-NULL, then it must point to a + * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number * of nfsd threads must exist and each must listed in ->sp_all_threads in each * entry of ->sv_pools[]. @@ -51,7 +51,6 @@ struct timeval nfssvc_boot; * nfsd_versions */ DEFINE_MUTEX(nfsd_mutex); -struct svc_serv *nfsd_serv; /* * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. @@ -60,8 +59,8 @@ struct svc_serv *nfsd_serv; * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ spinlock_t nfsd_drc_lock; -unsigned int nfsd_drc_max_mem; -unsigned int nfsd_drc_mem_used; +unsigned long nfsd_drc_max_mem; +unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -117,7 +116,10 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion; +static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { + [0] = 1, + [1] = 1, +}; int nfsd_vers(int vers, enum vers_op change) { @@ -152,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) return -1; switch(change) { case NFSD_SET: - nfsd_supported_minorversion = minorversion; + nfsd_supported_minorversions[minorversion] = true; break; case NFSD_CLEAR: - if (minorversion == 0) - return -1; - nfsd_supported_minorversion = minorversion - 1; + nfsd_supported_minorversions[minorversion] = false; break; case NFSD_TEST: - return minorversion <= nfsd_supported_minorversion; + return nfsd_supported_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; } @@ -172,28 +172,32 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) */ #define NFSD_MAXSERVS 8192 -int nfsd_nrthreads(void) +int nfsd_nrthreads(struct net *net) { int rv = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv) - rv = nfsd_serv->sv_nrthreads; + if (nn->nfsd_serv) + rv = nn->nfsd_serv->sv_nrthreads; mutex_unlock(&nfsd_mutex); return rv; } -static int nfsd_init_socks(int port) +static int nfsd_init_socks(struct net *net) { int error; - if (!list_empty(&nfsd_serv->sv_permsocks)) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port, + error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; - error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port, + error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, SVC_SOCK_DEFAULTS); if (error < 0) return error; @@ -201,14 +205,15 @@ static int nfsd_init_socks(int port) return 0; } -static bool nfsd_up = false; +static int nfsd_users = 0; -static int nfsd_startup(unsigned short port, int nrservs) +static int nfsd_startup_generic(int nrservs) { int ret; - if (nfsd_up) + if (nfsd_users++) return 0; + /* * Readahead param cache - will no-op if it already exists. * (Note therefore results will be suboptimal if number of @@ -217,51 +222,105 @@ static int nfsd_startup(unsigned short port, int nrservs) ret = nfsd_racache_init(2*nrservs); if (ret) return ret; - ret = nfsd_init_socks(port); + ret = nfs4_state_start(); if (ret) goto out_racache; - ret = lockd_up(); + return 0; + +out_racache: + nfsd_racache_shutdown(); + return ret; +} + +static void nfsd_shutdown_generic(void) +{ + if (--nfsd_users) + return; + + nfs4_state_shutdown(); + nfsd_racache_shutdown(); +} + +static bool nfsd_needs_lockd(void) +{ +#if defined(CONFIG_NFSD_V3) + return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); +#else + return (nfsd_versions[2] != NULL); +#endif +} + +static int nfsd_startup_net(int nrservs, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + if (nn->nfsd_net_up) + return 0; + + ret = nfsd_startup_generic(nrservs); if (ret) - goto out_racache; - ret = nfs4_state_start(); + return ret; + ret = nfsd_init_socks(net); + if (ret) + goto out_socks; + + if (nfsd_needs_lockd() && !nn->lockd_up) { + ret = lockd_up(net); + if (ret) + goto out_socks; + nn->lockd_up = 1; + } + + ret = nfs4_state_start_net(net); if (ret) goto out_lockd; - nfsd_up = true; + + nn->nfsd_net_up = true; return 0; + out_lockd: - lockd_down(); -out_racache: - nfsd_racache_shutdown(); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; + } +out_socks: + nfsd_shutdown_generic(); return ret; } -static void nfsd_shutdown(void) +static void nfsd_shutdown_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_state_shutdown_net(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; + } + nn->nfsd_net_up = false; + nfsd_shutdown_generic(); +} + +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + /* * write_ports can create the server without actually starting * any threads--if we get shut down before any threads are * started, then nfsd_last_thread will be run before any of this * other initialization has been done. */ - if (!nfsd_up) + if (!nn->nfsd_net_up) return; - nfs4_state_shutdown(); - lockd_down(); - nfsd_racache_shutdown(); - nfsd_up = false; -} + nfsd_shutdown_net(net); -static void nfsd_last_thread(struct svc_serv *serv) -{ - /* When last nfsd thread exits we need to do some clean-up */ - nfsd_serv = NULL; - nfsd_shutdown(); - - svc_rpcb_cleanup(serv); + svc_rpcb_cleanup(serv, net); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); - nfsd_export_flush(); + nfsd_export_flush(net); } void nfsd_reset_versions(void) @@ -304,79 +363,108 @@ static void set_max_drc(void) >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; spin_lock_init(&nfsd_drc_lock); - dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); + dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } -int nfsd_create_serv(void) +static int nfsd_get_default_max_blksize(void) { - int err = 0; + struct sysinfo i; + unsigned long long target; + unsigned long ret; + + si_meminfo(&i); + target = (i.totalram - i.totalhigh) << PAGE_SHIFT; + /* + * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig + * machines, but only uses 32K on 128M machines. Bottom out at + * 8K on 32M and smaller. Of course, this is only a default. + */ + target >>= 12; + + ret = NFSSVC_MAXBLKSIZE; + while (ret > target && ret >= 8*1024*2) + ret /= 2; + return ret; +} + +int nfsd_create_serv(struct net *net) +{ + int error; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv) { - svc_get(nfsd_serv); + if (nn->nfsd_serv) { + svc_get(nn->nfsd_serv); return 0; } - if (nfsd_max_blksize == 0) { - /* choose a suitable default */ - struct sysinfo i; - si_meminfo(&i); - /* Aim for 1/4096 of memory per thread - * This gives 1MB on 4Gig machines - * But only uses 32K on 128M machines. - * Bottom out at 8K on 32M and smaller. - * Of course, this is only a default. - */ - nfsd_max_blksize = NFSSVC_MAXBLKSIZE; - i.totalram <<= PAGE_SHIFT - 12; - while (nfsd_max_blksize > i.totalram && - nfsd_max_blksize >= 8*1024*2) - nfsd_max_blksize /= 2; - } + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); - - nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); - if (nfsd_serv == NULL) + if (nn->nfsd_serv == NULL) return -ENOMEM; + error = svc_bind(nn->nfsd_serv, net); + if (error < 0) { + svc_destroy(nn->nfsd_serv); + return error; + } + set_max_drc(); - do_gettimeofday(&nfssvc_boot); /* record boot time */ - return err; + do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ + return 0; } -int nfsd_nrpools(void) +int nfsd_nrpools(struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; else - return nfsd_serv->sv_nrpools; + return nn->nfsd_serv->sv_nrpools; } -int nfsd_get_nrthreads(int n, int *nthreads) +int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (nfsd_serv != NULL) { - for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads; + if (nn->nfsd_serv != NULL) { + for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) + nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; } return 0; } -int nfsd_set_nrthreads(int n, int *nthreads) +void nfsd_destroy(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int destroy = (nn->nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nn->nfsd_serv, net); + svc_destroy(nn->nfsd_serv); + if (destroy) + nn->nfsd_serv = NULL; +} + +int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; int tot = 0; int err = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); WARN_ON(!mutex_is_locked(&nfsd_mutex)); - if (nfsd_serv == NULL || n <= 0) + if (nn->nfsd_serv == NULL || n <= 0) return 0; - if (n > nfsd_serv->sv_nrpools) - n = nfsd_serv->sv_nrpools; + if (n > nn->nfsd_serv->sv_nrpools) + n = nn->nfsd_serv->sv_nrpools; /* enforce a global maximum number of threads */ tot = 0; @@ -406,15 +494,14 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - svc_get(nfsd_serv); + svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], + err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } - svc_destroy(nfsd_serv); - + nfsd_destroy(net); return err; } @@ -424,10 +511,11 @@ int nfsd_set_nrthreads(int n, int *nthreads) * this is the first time nrservs is nonzero. */ int -nfsd_svc(unsigned short port, int nrservs) +nfsd_svc(int nrservs, struct net *net) { int error; bool nfsd_up_before; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -436,31 +524,31 @@ nfsd_svc(unsigned short port, int nrservs) if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; error = 0; - if (nrservs == 0 && nfsd_serv == NULL) + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - error = nfsd_create_serv(); + error = nfsd_create_serv(net); if (error) goto out; - nfsd_up_before = nfsd_up; + nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup(port, nrservs); + error = nfsd_startup_net(nrservs, net); if (error) goto out_destroy; - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nfsd_serv which + /* We are holding a reference to nn->nfsd_serv which * we don't want to count in the return value, * so subtract 1 */ - error = nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads - 1; out_shutdown: if (error < 0 && !nfsd_up_before) - nfsd_shutdown(); + nfsd_shutdown_net(net); out_destroy: - svc_destroy(nfsd_serv); /* Release server */ + nfsd_destroy(net); /* Release server */ out: mutex_unlock(&nfsd_mutex); return error; @@ -474,7 +562,9 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; - int err, preverr = 0; + struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); + struct net *net = perm_sock->xpt_net; + int err; /* Lock module and set up kernel thread */ mutex_lock(&nfsd_mutex); @@ -501,12 +591,6 @@ nfsd(void *vrqstp) nfsdstats.th_cnt++; mutex_unlock(&nfsd_mutex); - /* - * We want less throttling in balance_dirty_pages() so that nfs to - * localhost doesn't cause nfsd to lock up due to all the client's - * dirty pages. - */ - current->flags |= PF_LESS_THROTTLE; set_freezable(); /* @@ -521,16 +605,6 @@ nfsd(void *vrqstp) ; if (err == -EINTR) break; - else if (err < 0) { - if (err != preverr) { - printk(KERN_WARNING "%s: unexpected error " - "from svc_recv (%d)\n", __func__, -err); - preverr = err; - } - schedule_timeout_uninterruptible(HZ); - continue; - } - validate_process_creds(); svc_process(rqstp); validate_process_creds(); @@ -543,9 +617,13 @@ nfsd(void *vrqstp) nfsdstats.th_cnt --; out: + rqstp->rq_server = NULL; + /* Release the thread */ svc_exit_thread(rqstp); + nfsd_destroy(net); + /* Release module */ mutex_unlock(&nfsd_mutex); module_put_and_exit(0); @@ -589,7 +667,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: @@ -633,21 +710,23 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); + mutex_lock(&nfsd_mutex); - if (nfsd_serv == NULL) { + if (nn->nfsd_serv == NULL) { mutex_unlock(&nfsd_mutex); return -ENODEV; } /* bump up the psudo refcount while traversing */ - svc_get(nfsd_serv); - ret = svc_pool_stats_open(nfsd_serv, file); + svc_get(nn->nfsd_serv); + ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); return ret; } @@ -655,9 +734,11 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); + struct net *net = inode->i_sb->s_fs_info; + mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ - svc_destroy(nfsd_serv); + nfsd_destroy(net); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 65ec595e222..1ac306b769d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -4,6 +4,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include "vfs.h" #include "xdr.h" #include "auth.h" @@ -100,12 +101,14 @@ decode_sattr(__be32 *p, struct iattr *iap) iap->ia_mode = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + iap->ia_uid = make_kuid(&init_user_ns, tmp); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + iap->ia_gid = make_kgid(&init_user_ns, tmp); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; @@ -151,8 +154,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); @@ -194,11 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, } /* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { - struct kstat stat; - vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat); - return encode_fattr(rqstp, p, fhp, &stat); + return encode_fattr(rqstp, p, fhp, stat); } /* @@ -213,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -246,8 +248,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readargs *args) { unsigned int len; - int v,pn; - if (!(p = decode_fh(p, &args->fh))) + int v; + p = decode_fh(p, &args->fh); + if (!p) return 0; args->offset = ntohl(*p++); @@ -262,8 +265,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -279,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, hdr, dlen; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p++; /* beginoffset */ @@ -353,9 +358,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, int nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -389,14 +395,15 @@ int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffb5df1db94..374c66283ac 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -37,7 +37,6 @@ #include <linux/idr.h> #include <linux/sunrpc/svc_xprt.h> -#include <linux/nfsd/nfsfh.h> #include "nfsfh.h" typedef struct { @@ -79,6 +78,8 @@ struct nfs4_stid { #define NFS4_DELEG_STID 4 /* For an open stateid kept around *only* to process close replays: */ #define NFS4_CLOSED_STID 8 +/* For a deleg stateid kept around only to process free_stateid's: */ +#define NFS4_REVOKED_DELEG_STID 16 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; @@ -121,19 +122,21 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 /* Maximum session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE 1024 +#define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) struct nfsd4_slot { - bool sl_inuse; - bool sl_cachethis; - u16 sl_opcnt; u32 sl_seqid; __be32 sl_status; u32 sl_datalen; + u16 sl_opcnt; +#define NFSD4_SLOT_INUSE (1 << 0) +#define NFSD4_SLOT_CACHETHIS (1 << 1) +#define NFSD4_SLOT_INITIALIZED (1 << 2) + u8 sl_flags; char sl_data[]; }; @@ -148,6 +151,12 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_cb_sec { + u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ + kuid_t uid; + kgid_t gid; +}; + struct nfsd4_create_session { clientid_t clientid; struct nfs4_sessionid sessionid; @@ -156,8 +165,12 @@ struct nfsd4_create_session { struct nfsd4_channel_attrs fore_channel; struct nfsd4_channel_attrs back_channel; u32 callback_prog; - u32 uid; - u32 gid; + struct nfsd4_cb_sec cb_sec; +}; + +struct nfsd4_backchannel_ctl { + u32 bc_cb_program; + struct nfsd4_cb_sec bc_cb_sec; }; struct nfsd4_bind_conn_to_session { @@ -182,32 +195,24 @@ struct nfsd4_conn { }; struct nfsd4_session { - struct kref se_ref; + atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; +/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ +#define NFS4_SESSION_DEAD 0x010 u32 se_flags; struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; struct nfsd4_channel_attrs se_bchannel; + struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; u32 se_cb_prog; u32 se_cb_seq_nr; struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -static inline void -nfsd4_put_session(struct nfsd4_session *ses) -{ - extern void free_session(struct kref *kref); - kref_put(&ses->se_ref, free_session); -} - -static inline void -nfsd4_get_session(struct nfsd4_session *ses) -{ - kref_get(&ses->se_ref); -} +extern void nfsd4_put_session(struct nfsd4_session *ses); /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { @@ -230,29 +235,33 @@ struct nfsd4_sessionid { */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ - struct list_head cl_strhash; /* hash by cl_name */ + struct rb_node cl_namenode; /* link into by-name trees */ struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; + struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ - char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ - u32 cl_flavor; /* setclientid pseudoflavor */ - char *cl_principal; /* setclientid principal name */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ - u32 cl_firststate; /* recovery dir creation */ u32 cl_minorversion; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; -#define NFSD4_CLIENT_CB_UPDATE 1 -#define NFSD4_CLIENT_KILL 2 - unsigned long cl_cb_flags; +#define NFSD4_CLIENT_CB_UPDATE (0) +#define NFSD4_CLIENT_CB_KILL (1) +#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ +#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ +#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ +#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ + 1 << NFSD4_CLIENT_CB_KILL) + unsigned long cl_flags; + struct rpc_cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; #define NFSD4_CB_UP 0 @@ -279,20 +288,9 @@ struct nfs4_client { unsigned long cl_cb_slot_busy; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ + struct net *net; }; -static inline void -mark_client_expired(struct nfs4_client *clp) -{ - clp->cl_time = 0; -} - -static inline bool -is_client_expired(struct nfs4_client *clp) -{ - return clp->cl_time == 0; -} - /* struct nfs4_client_reset * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl * upon lease reset, or from upcall to state_daemon (to read in state @@ -300,6 +298,7 @@ is_client_expired(struct nfs4_client *clp) */ struct nfs4_client_reclaim { struct list_head cr_strhash; /* hash by cr_name */ + struct nfs4_client *cr_clp; /* pointer to associated clp */ char cr_recdir[HEXDIR_LEN]; /* recover dir */ }; @@ -359,7 +358,6 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_PURGE_CLOSE 2 #define NFS4_OO_NEW 4 unsigned char oo_flags; }; @@ -367,7 +365,7 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; /* for lockowners only */ + struct list_head lo_perstateid; struct list_head lo_list; /* for temporary uses */ }; @@ -381,14 +379,10 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) return container_of(so, struct nfs4_lockowner, lo_owner); } -/* -* nfs4_file: a file opened by some number of (open) nfs4_stateowners. -* o fi_perfile list is used to search for conflicting -* share_acces, share_deny on the file. -*/ +/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ struct nfs4_file { atomic_t fi_ref; - struct list_head fi_hash; /* hash by "struct inode *" */ + struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ @@ -457,36 +451,61 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) #define WR_STATE 0x00000020 struct nfsd4_compound_state; +struct nfsd_net; -extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, +extern __be32 nfs4_preprocess_stateid_op(struct net *net, + struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); -extern int nfs4_in_grace(void); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid); -extern void nfs4_free_openowner(struct nfs4_openowner *); -extern void nfs4_free_lockowner(struct nfs4_lockowner *); +void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); +extern void nfs4_release_reclaim(struct nfsd_net *); +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, + struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); extern int set_callback_cred(void); +extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); -extern void nfsd4_do_callback_rpc(struct work_struct *); extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); -extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); -extern void nfsd4_init_recdir(void); -extern int nfsd4_recdir_load(void); -extern void nfsd4_shutdown_recdir(void); -extern int nfs4_client_to_reclaim(const char *name); -extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); -extern void nfsd4_recdir_purge_old(void); -extern void nfsd4_create_clid_dir(struct nfs4_client *clp); -extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); -extern void release_session_client(struct nfsd4_session *); -extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); -extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, + struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); +extern void put_client_renew(struct nfs4_client *clp); + +/* nfs4recover operations */ +extern int nfsd4_client_tracking_init(struct net *net); +extern void nfsd4_client_tracking_exit(struct net *net); +extern void nfsd4_client_record_create(struct nfs4_client *clp); +extern void nfsd4_client_record_remove(struct nfs4_client *clp); +extern int nfsd4_client_record_check(struct nfs4_client *clp); +extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); + +/* nfs fault injection functions */ +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); + +u64 nfsd_forget_client(struct nfs4_client *, u64); +u64 nfsd_forget_client_locks(struct nfs4_client*, u64); +u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); +u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); +u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_print_client(struct nfs4_client *, u64); +u64 nfsd_print_client_locks(struct nfs4_client *, u64); +u64 nfsd_print_client_openowners(struct nfs4_client *, u64); +u64 nfsd_print_client_delegations(struct nfs4_client *, u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index a2e2402b2af..cd90878a76a 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -24,7 +24,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/sunrpc/stats.h> -#include <linux/nfsd/stats.h> +#include <net/net_namespace.h> #include "nfsd.h" @@ -94,11 +94,11 @@ static const struct file_operations nfsd_proc_fops = { void nfsd_stat_init(void) { - svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); + svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); } void nfsd_stat_shutdown(void) { - svc_proc_unregister("nfsd"); + svc_proc_unregister(&init_net, "nfsd"); } diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 00000000000..a5c944b771c --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include <uapi/linux/nfsd/stats.h> + + +struct nfsd_stats { + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats nfsdstats; +extern struct svc_stat nfsd_svcstats; + +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index edf6d3ed877..140c496f612 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -28,6 +28,7 @@ #include <asm/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> +#include <linux/security.h> #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -206,7 +207,12 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - fh_lock(fhp); + /* + * In the nfsd4_open() case, this may be held across + * subsequent open and delegation acquisition which may + * need to take the child's i_mutex: + */ + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = lookup_one_len(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -272,13 +278,6 @@ out: return err; } -static int nfsd_break_lease(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - return break_lease(inode, O_WRONLY | O_NONBLOCK); -} - /* * Commit metadata changes to stable storage. */ @@ -297,41 +296,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -341,8 +311,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -368,30 +337,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -401,8 +346,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* Revoke setuid/setgid on chown */ if (!S_ISDIR(inode->i_mode) && - (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || - ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { + ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) { iap->ia_valid |= ATTR_KILL_PRIV; if (iap->ia_valid & ATTR_MODE) { /* we're setting mode too, just clear the s*id bits */ @@ -414,186 +358,118 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } - - /* Change the attributes. */ - - iap->ia_valid |= ATTR_CTIME; - - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); - - host_err = notify_change(dentry, iap); - err = nfserrno(host_err); - fh_unlock(fhp); - } - if (size_change) - put_write_access(inode); - if (!err) - commit_metadata(fhp); -out: - return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } -#if defined(CONFIG_NFSD_V2_ACL) || \ - defined(CONFIG_NFSD_V3_ACL) || \ - defined(CONFIG_NFSD_V4) -static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) { - ssize_t buflen; - ssize_t ret; - - buflen = vfs_getxattr(dentry, key, NULL, 0); - if (buflen <= 0) - return buflen; + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - *buf = kmalloc(buflen, GFP_KERNEL); - if (!*buf) - return -ENOMEM; + if (iap->ia_size < inode->i_size) { + __be32 err; - ret = vfs_getxattr(dentry, key, *buf, buflen); - if (ret < 0) - kfree(*buf); - return ret; -} -#endif + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } -#if defined(CONFIG_NFSD_V4) -static int -set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) -{ - int len; - size_t buflen; - char *buf = NULL; - int error = 0; - - buflen = posix_acl_xattr_size(pacl->a_count); - buf = kmalloc(buflen, GFP_KERNEL); - error = -ENOMEM; - if (buf == NULL) - goto out; + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; - len = posix_acl_to_xattr(pacl, buf, buflen); - if (len < 0) { - error = len; - goto out; - } + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; - error = vfs_setxattr(dentry, key, buf, len, 0); -out: - kfree(buf); - return error; +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); } +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ __be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) { - __be32 error; - int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + bool get_write_count; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + } dentry = fhp->fh_dentry; inode = dentry->d_inode; - if (S_ISDIR(inode->i_mode)) - flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); - if (host_error == -EINVAL) { - return nfserr_attrnotsupp; - } else if (host_error < 0) - goto out_nfserr; + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; - host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); - if (host_error < 0) - goto out_release; + if (!iap->ia_valid) + goto out; - if (S_ISDIR(inode->i_mode)) - host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); + nfsd_sanitize_attrs(inode, iap); -out_release: - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; - else - return nfserrno(host_error); -} + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; + } -static struct posix_acl * -_get_posix_acl(struct dentry *dentry, char *key) -{ - void *buf = NULL; - struct posix_acl *pacl = NULL; - int buflen; - - buflen = nfsd_getxattr(dentry, key, &buf); - if (!buflen) - buflen = -ENODATA; - if (buflen <= 0) - return ERR_PTR(buflen); - - pacl = posix_acl_from_xattr(buf, buflen); - kfree(buf); - return pacl; -} + iap->ia_valid |= ATTR_CTIME; -int -nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) -{ - struct inode *inode = dentry->d_inode; - int error = 0; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; - - pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); - if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) - pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) { - error = PTR_ERR(pacl); - pacl = NULL; - goto out; + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; } - if (S_ISDIR(inode->i_mode)) { - dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); - if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) - dpacl = NULL; - else if (IS_ERR(dpacl)) { - error = PTR_ERR(dpacl); - dpacl = NULL; - goto out; - } - flags = NFS4_ACL_DIR; - } + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + err = nfserrno(host_err); - *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); - if (IS_ERR(*acl)) { - error = PTR_ERR(*acl); - *acl = NULL; - } - out: - posix_acl_release(pacl); - posix_acl_release(dpacl); - return error; +out_put_write_access: + if (size_change) + put_write_access(inode); + if (!err) + commit_metadata(fhp); +out: + return err; } +#if defined(CONFIG_NFSD_V4) /* * NFS junction information is stored in an extended attribute. */ @@ -621,6 +497,33 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + __be32 error; + int host_error; + struct dentry *dentry; + + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 @@ -737,14 +640,15 @@ static int nfsd_open_break_lease(struct inode *inode, int access) /* * Open an existing file or directory. - * The access argument indicates the type of open (read/write/lock) + * The may_flags argument indicates the type of open (read/write/lock) + * and additional flags. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int access, struct file **filp) + int may_flags, struct file **filp) { - struct dentry *dentry; + struct path path; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; __be32 err; @@ -756,19 +660,28 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. + * + * Arguably we should also allow the owner override for + * directories, but we never have and it doesn't seem to have + * caused anyone a problem. If we were to change this, note + * also that our filldir callbacks would need a variant of + * lookup_one_len that doesn't check permissions. */ - err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); + if (type == S_IFREG) + may_flags |= NFSD_MAY_OWNER_OVERRIDE; + err = fh_verify(rqstp, fhp, type, may_flags); if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; /* Disallow write access to files with the append-only bit set * or any access when mandatory locking enabled */ err = nfserr_perm; - if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) + if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; /* * We must ignore files (but only files) which might have mandatory @@ -781,22 +694,29 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, if (!inode->i_fop) goto out; - host_err = nfsd_open_break_lease(inode, access); + host_err = nfsd_open_break_lease(inode, may_flags); if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; - if (access & NFSD_MAY_WRITE) { - if (access & NFSD_MAY_READ) + if (may_flags & NFSD_MAY_WRITE) { + if (may_flags & NFSD_MAY_READ) flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; } - *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, current_cred()); - if (IS_ERR(*filp)) + *filp = dentry_open(&path, flags, current_cred()); + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else - host_err = ima_file_check(*filp, access); + *filp = NULL; + } else { + host_err = ima_file_check(*filp, may_flags); + + if (may_flags & NFSD_MAY_64BIT_COOKIE) + (*filp)->f_mode |= FMODE_64BITHASH; + else + (*filp)->f_mode |= FMODE_32BITHASH; + } + out_nfserr: err = nfserrno(host_err); out: @@ -870,7 +790,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page **pp = rqstp->rq_next_page; struct page *page = buf->page; size_t size; @@ -878,17 +798,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (rqstp->rq_res.page_len == 0) { get_page(page); - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); - if (*pp) - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + if (*rqstp->rq_next_page) + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_len += size; } else rqstp->rq_res.page_len += size; @@ -902,51 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { - mm_segment_t oldfs; - __be32 err; - int host_err; - - err = nfserr_perm; - - if (file->f_op->splice_read && rqstp->rq_splice_ok) { - struct splice_desc sd = { - .len = 0, - .total_len = *count, - .pos = offset, - .u.data = rqstp, - }; - - rqstp->rq_resused = 1; - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - } else { - oldfs = get_fs(); - set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); - set_fs(oldfs); - } - if (host_err >= 0) { nfsdstats.io_read += host_err; *count = host_err; - err = 0; fsnotify_access(file); + return 0; } else - err = nfserrno(host_err); - return err; + return nfserrno(host_err); } -static void kill_suid(struct dentry *dentry) +int nfsd_splice_read(struct svc_rqst *rqstp, + struct file *file, loff_t offset, unsigned long *count) { - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + int host_err; - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &ia); - mutex_unlock(&dentry->d_inode->i_mutex); + rqstp->rq_next_page = rqstp->rq_respages + 1; + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + return nfsd_finish_read(file, count, host_err); +} + +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + mm_segment_t oldfs; + int host_err; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + return nfsd_finish_read(file, count, host_err); +} + +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + if (file->f_op->splice_read && rqstp->rq_splice_ok) + return nfsd_splice_read(rqstp, file, offset, count); + else + return nfsd_readv(file, offset, vec, vlen, count); } /* @@ -965,7 +886,7 @@ static void kill_suid(struct dentry *dentry) */ static int wait_for_concurrent_writes(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); static ino_t last_ino; static dev_t last_dev; int err = 0; @@ -999,37 +920,30 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int host_err; int stable = *stablep; int use_wgather; + loff_t pos = offset; + unsigned int pflags = current->flags; + + if (rqstp->rq_local) + /* + * We want less throttling in balance_dirty_pages() + * and shrink_inactive_list() so that nfs to + * localhost doesn't cause nfsd to lock up due to all + * the client's dirty pages or its congested queue. + */ + current->flags |= PF_LESS_THROTTLE; dentry = file->f_path.dentry; inode = dentry->d_inode; exp = fhp->fh_export; - /* - * Request sync writes if - * - the sync export option has been set, or - * - the client requested O_SYNC behavior (NFSv3 feature). - * - The file system doesn't support fsync(). - * When NFSv2 gathered writes have been configured for this volume, - * flushing the data to disk is handled separately below. - */ use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); - if (!file->f_op->fsync) {/* COMMIT3 cannot work */ - stable = 2; - *stablep = 2; /* FILE_SYNC */ - } - if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !use_wgather) { - spin_lock(&file->f_lock); - file->f_flags |= O_SYNC; - spin_unlock(&file->f_lock); - } /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); set_fs(oldfs); if (host_err < 0) goto out_nfserr; @@ -1037,12 +951,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, nfsdstats.io_write += host_err; fsnotify_modify(file); - /* clear setuid/setgid flag after write */ - if (inode->i_mode & (S_ISUID | S_ISGID)) - kill_suid(dentry); - - if (stable && use_wgather) - host_err = wait_for_concurrent_writes(file); + if (stable) { + if (use_wgather) + host_err = wait_for_concurrent_writes(file); + else + host_err = vfs_fsync_range(file, offset, offset+*cnt, 0); + } out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); @@ -1050,36 +964,33 @@ out_nfserr: err = 0; else err = nfserrno(host_err); + if (rqstp->rq_local) + tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); return err; } -/* - * Read data from a file. count must contain the requested read count - * on entry. On return, *count contains the number of bytes actually read. - * N.B. After this call fhp needs an fh_put - */ -__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file **file, struct raparms **ra) { - struct file *file; struct inode *inode; - struct raparms *ra; __be32 err; - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); if (err) return err; - inode = file->f_path.dentry->d_inode; + inode = file_inode(*file); /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - - if (ra && ra->p_set) - file->f_ra = ra->p_ra; + *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + if (*ra && (*ra)->p_set) + (*file)->f_ra = (*ra)->p_ra; + return nfs_ok; +} +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{ /* Write back readahead params */ if (ra) { struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; @@ -1089,28 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, ra->p_count--; spin_unlock(&rab->pb_lock); } - nfsd_close(file); - return err; } -/* As above, but use the provided file descriptor. */ -__be32 -nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - __be32 err; + struct file *file; + struct raparms *ra; + __be32 err; + + err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + if (err) + return err; + + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + + nfsd_put_tmp_read_open(file, ra); - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - } else /* Note file may still be NULL in NFSv4 special stateid case: */ - err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); -out: return err; } @@ -1205,7 +1117,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, * send along the gid on create when it tries to implement * setgid directories via NFS: */ - if (current_fsuid() != 0) + if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); @@ -1268,6 +1180,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * If it has, the parent directory should already be locked. */ if (!resfhp->fh_dentry) { + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); @@ -1283,9 +1199,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %s/%s not locked!\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + "nfsd_create: parent %pd2 not locked!\n", + dentry); err = nfserr_io; goto out; } @@ -1295,8 +1210,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %s/%s not negative!\n", - dentry->d_name.name, dchild->d_name.name); + dprintk("nfsd_create: dentry %pd/%pd not negative!\n", + dentry, dchild); goto out; } @@ -1311,17 +1226,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - /* * Get the dir op function pointer. */ err = 0; + host_err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; @@ -1335,10 +1247,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; } - if (host_err < 0) { - fh_drop_write(fhp); + if (host_err < 0) goto out_nfserr; - } err = nfsd_create_setattr(rqstp, resfhp, iap); @@ -1350,7 +1260,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err2 = nfserrno(commit_metadata(fhp)); if (err2) err = err2; - fh_drop_write(fhp); /* * Update the file handle to get the new inode info. */ @@ -1409,6 +1318,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfserr_notdir; if (!dirp->i_op->lookup) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); /* @@ -1441,16 +1355,13 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, v_atime = verifier[1]&0x7fffffff; } - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; if (dchild->d_inode) { err = 0; switch (createmode) { case NFS3_CREATE_UNCHECKED: if (! S_ISREG(dchild->d_inode->i_mode)) - err = nfserr_exist; + goto out; else if (truncp) { /* in nfsv4, we need to treat this case a little * differently. we don't want to truncate the @@ -1469,13 +1380,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } case NFS4_CREATE_EXCLUSIVE4_1: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; @@ -1484,7 +1401,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (host_err < 0) { fh_drop_write(fhp); goto out_nfserr; @@ -1514,7 +1431,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!err) err = nfserrno(commit_metadata(fhp)); - fh_drop_write(fhp); /* * Update the filehandle to get the new inode info. */ @@ -1525,6 +1441,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); if (dchild && !IS_ERR(dchild)) dput(dchild); + fh_drop_write(fhp); return err; out_nfserr: @@ -1541,30 +1458,31 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) { - struct dentry *dentry; struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; + struct path path; err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; err = nfserr_inval; if (!inode->i_op->readlink) goto out; - touch_atime(fhp->fh_export->ex_path.mnt, dentry); + touch_atime(&path); /* N.B. Why does this call need a get_fs()?? * Remove the set_fs and watch the fireworks:-) --okir */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = inode->i_op->readlink(dentry, buf, *lenp); + host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp); set_fs(oldfs); if (host_err < 0) @@ -1604,6 +1522,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock(fhp); dentry = fhp->fh_dentry; dnew = lookup_one_len(fname, dentry, flen); @@ -1611,10 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) @@ -1674,6 +1593,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + host_err = fh_want_write(tfhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; @@ -1685,20 +1610,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, dold = tfhp->fh_dentry; - host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); - goto out_dput; - } err = nfserr_noent; if (!dold->d_inode) - goto out_drop_write; - host_err = nfsd_break_lease(dold->d_inode); - if (host_err) { - err = nfserrno(host_err); - goto out_drop_write; - } - host_err = vfs_link(dold, dirp, dnew); + goto out_dput; + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1709,12 +1624,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_drop_write: - fh_drop_write(tfhp); out_dput: dput(dnew); out_unlock: fh_unlock(ffhp); + fh_drop_write(tfhp); out: return err; @@ -1749,14 +1663,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, tdentry = tfhp->fh_dentry; tdir = tdentry->d_inode; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - if (ffhp->fh_export != tfhp->fh_export) - goto out; - err = nfserr_perm; if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + host_err = fh_want_write(ffhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); @@ -1787,41 +1703,31 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = -EXDEV; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out_dput_new; - host_err = fh_want_write(ffhp); - if (host_err) + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - host_err = nfsd_break_lease(odentry->d_inode); - if (host_err) - goto out_drop_write; - if (ndentry->d_inode) { - host_err = nfsd_break_lease(ndentry->d_inode); - if (host_err) - goto out_drop_write; - } - host_err = vfs_rename(fdir, odentry, tdir, ndentry); + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) host_err = commit_metadata(ffhp); } -out_drop_write: - fh_drop_write(ffhp); out_dput_new: dput(ndentry); out_dput_old: dput(odentry); out_nfserr: err = nfserrno(host_err); - - /* we cannot reply on fh_unlock on the two filehandles, + /* + * We cannot rely on fh_unlock on the two filehandles, * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand + * were the same, so again we do it by hand. */ fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = 0; + fh_drop_write(ffhp); out: return err; @@ -1847,6 +1753,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (err) goto out; + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = dentry->d_inode; @@ -1865,22 +1775,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; - host_err = fh_want_write(fhp); - if (host_err) - goto out_put; - - host_err = nfsd_break_lease(rdentry->d_inode); - if (host_err) - goto out_drop_write; if (type != S_IFDIR) - host_err = vfs_unlink(dirp, rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) host_err = commit_metadata(fhp); -out_drop_write: - fh_drop_write(fhp); -out_put: dput(rdentry); out_nfserr: @@ -1905,6 +1805,7 @@ struct buffered_dirent { }; struct readdir_data { + struct dir_context ctx; char *dirent; size_t used; int full; @@ -1936,27 +1837,29 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { - struct readdir_data buf; struct buffered_dirent *de; int host_err; int size; loff_t offset; + struct readdir_data buf = { + .ctx.actor = nfsd_buffered_filldir, + .dirent = (void *)__get_free_page(GFP_KERNEL) + }; - buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); offset = *offsetp; while (1) { - struct inode *dir_inode = file->f_path.dentry->d_inode; + struct inode *dir_inode = file_inode(file); unsigned int reclen; cdp->err = nfserr_eof; /* will be cleared on successful read */ buf.used = 0; buf.full = 0; - host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + host_err = iterate_dir(file, &buf.ctx); if (buf.full) host_err = 0; @@ -2020,12 +1923,17 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, __be32 err; struct file *file; loff_t offset = *offsetp; + int may_flags = NFSD_MAY_READ; - err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); + /* NFSv2 only supports 32 bit cookies */ + if (rqstp->rq_vers > 2) + may_flags |= NFSD_MAY_64BIT_COOKIE; + + err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; - offset = vfs_llseek(file, offset, 0); + offset = vfs_llseek(file, offset, SEEK_SET); if (offset < 0) { err = nfserrno((int)offset); goto out_close; @@ -2137,7 +2045,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, * with NFSv3. */ if ((acc & NFSD_MAY_OWNER_OVERRIDE) && - inode->i_uid == current_fsuid()) + uid_eq(inode->i_uid, current_fsuid())) return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ @@ -2212,93 +2120,3 @@ out_nomem: nfsd_racache_shutdown(); return -ENOMEM; } - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl * -nfsd_get_posix_acl(struct svc_fh *fhp, int type) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - ssize_t size; - struct posix_acl *acl; - - if (!IS_POSIXACL(inode)) - return ERR_PTR(-EOPNOTSUPP); - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EOPNOTSUPP); - } - - size = nfsd_getxattr(fhp->fh_dentry, name, &value); - if (size < 0) - return ERR_PTR(size); - - acl = posix_acl_from_xattr(value, size); - kfree(value); - return acl; -} - -int -nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - size_t size; - int error; - - if (!IS_POSIXACL(inode) || - !inode->i_op->setxattr || !inode->i_op->removexattr) - return -EOPNOTSUPP; - switch(type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return -EOPNOTSUPP; - } - - if (acl && acl->a_count) { - size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(size, GFP_KERNEL); - if (!value) - return -ENOMEM; - error = posix_acl_to_xattr(acl, value, size); - if (error < 0) - goto getout; - size = error; - } else - size = 0; - - error = fh_want_write(fhp); - if (error) - goto getout; - if (size) - error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); - else { - if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) - error = 0; - else { - error = vfs_removexattr(fhp->fh_dentry, name); - if (error == -ENODATA) - error = 0; - } - } - fh_drop_write(fhp); - -getout: - kfree(value); - return error; -} -#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 1dcd238e11a..91b6ae3f658 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,7 @@ #define LINUX_NFSD_VFS_H #include "nfsfh.h" +#include "nfsd.h" /* * Flags for nfsd_permission @@ -27,6 +28,8 @@ #define NFSD_MAY_BYPASS_GSS 0x400 #define NFSD_MAY_READ_IF_EXEC 0x800 +#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ + #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -36,7 +39,6 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); /* nfsd/vfs.c */ -int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); void nfsd_racache_shutdown(void); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -50,9 +52,8 @@ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct iattr *, int, time_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, - struct nfs4_acl *); -int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -69,10 +70,16 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); void nfsd_close(struct file *); +struct raparms; +__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, + struct file **, struct raparms **); +void nfsd_put_tmp_read_open(struct file *, struct raparms *); +int nfsd_splice_read(struct svc_rqst *, + struct file *, loff_t, unsigned long *); +int nfsd_readv(struct file *, loff_t, struct kvec *, int, + unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); -__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, - loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, loff_t, struct kvec *,int, unsigned long *, int *); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, @@ -85,35 +92,38 @@ __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, __be32 nfsd_rename(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *, char *, int); -__be32 nfsd_remove(struct svc_rqst *, - struct svc_fh *, char *, int); __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); -int nfsd_truncate(struct svc_rqst *, struct svc_fh *, - unsigned long size); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); -int nfsd_sync_dir(struct dentry *dp); - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); -int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); -#endif static inline int fh_want_write(struct svc_fh *fh) { - return mnt_want_write(fh->fh_export->ex_path.mnt); + int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + + if (!ret) + fh->fh_want_write = 1; + return ret; } static inline void fh_drop_write(struct svc_fh *fh) { - mnt_drop_write(fh->fh_export->ex_path.mnt); + if (fh->fh_want_write) { + fh->fh_want_write = 0; + mnt_drop_write(fh->fh_export->ex_path.mnt); + } +} + +static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +{ + struct path p = {.mnt = fh->fh_export->ex_path.mnt, + .dentry = fh->fh_dentry}; + return nfserrno(vfs_getattr(&p, stat)); } #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 53b1863dd8f..4f0481d6380 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -167,7 +167,7 @@ int nfssvc_encode_entry(void *, const char *name, int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); /* Helper functions for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp); +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); #endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 7df980eb056..335e04aaf7d 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -136,6 +136,7 @@ struct nfsd3_accessres { __be32 status; struct svc_fh fh; __u32 access; + struct kstat stat; }; struct nfsd3_readlinkres { @@ -173,6 +174,9 @@ struct nfsd3_linkres { struct nfsd3_readdirres { __be32 status; struct svc_fh fh; + /* Just to save kmalloc on every readdirplus entry (svc_fh is a + * little large for the stack): */ + struct svc_fh scratch; int count; __be32 verf[2]; @@ -225,6 +229,7 @@ struct nfsd3_getaclres { int mask; struct posix_acl *acl_access; struct posix_acl *acl_default; + struct kstat stat; }; /* dummy type for release */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2364747ee97..18cbb6d9c8a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -40,9 +40,17 @@ #include "state.h" #include "nfsd.h" +#define NFSD4_MAX_SEC_LABEL_LEN 2048 #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) +#define CURRENT_STATE_ID_FLAG (1<<0) +#define SAVED_STATE_ID_FLAG (1<<1) + +#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) +#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) +#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) + struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; @@ -50,10 +58,14 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; - __be32 *datap; + int data_offset; size_t iovlen; u32 minorversion; - u32 status; + __be32 status; + stateid_t current_stateid; + stateid_t save_stateid; + /* to indicate current and saved state id presents */ + u32 sid_flags; }; static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) @@ -107,6 +119,7 @@ struct nfsd4_create { struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -173,7 +186,6 @@ struct nfsd4_lock { #define lk_old_lock_stateid v.old.lock_stateid #define lk_old_lock_seqid v.old.lock_seqid -#define lk_rflags u.ok.rflags #define lk_resp_stateid u.ok.stateid #define lk_denied u.denied @@ -212,17 +224,21 @@ struct nfsd4_open { struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ stateid_t op_delegate_stateid; /* request - response */ + u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ u32 op_create; /* request */ u32 op_createmode; /* request */ u32 op_bmval[3]; /* request */ - struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ - nfs4_verifier verf; /* EXCLUSIVE4 */ + struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ + nfs4_verifier op_verf __attribute__((aligned(32))); + /* EXCLUSIVE4 */ clientid_t op_clientid; /* request */ struct xdr_netobj op_owner; /* request */ u32 op_seqid; /* request */ u32 op_share_access; /* request */ u32 op_share_deny; /* request */ + u32 op_deleg_want; /* request */ stateid_t op_stateid; /* response */ + __be32 op_xdr_error; /* see nfsd4_open_omfg() */ u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ u32 op_rflags; /* response */ @@ -232,9 +248,8 @@ struct nfsd4_open { struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; + struct xdr_netobj op_label; }; -#define op_iattr iattr -#define op_verf verf struct nfsd4_open_confirm { stateid_t oc_req_stateid /* request */; @@ -245,8 +260,9 @@ struct nfsd4_open_confirm { struct nfsd4_open_downgrade { stateid_t od_stateid; u32 od_seqid; - u32 od_share_access; - u32 od_share_deny; + u32 od_share_access; /* request */ + u32 od_deleg_want; /* request */ + u32 od_share_deny; /* request */ }; @@ -271,9 +287,8 @@ struct nfsd4_readdir { struct svc_fh * rd_fhp; /* response */ struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + struct xdr_stream *xdr; + int cookie_offset; }; struct nfsd4_release_lockowner { @@ -316,6 +331,7 @@ struct nfsd4_setattr { u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; }; struct nfsd4_setclientid { @@ -343,15 +359,19 @@ struct nfsd4_saved_compoundargs { struct page **pagelist; }; +struct nfsd4_test_stateid_id { + __be32 ts_id_status; + stateid_t ts_id_stateid; + struct list_head ts_id_list; +}; + struct nfsd4_test_stateid { - __be32 ts_num_ids; - struct nfsd4_compoundargs *ts_saved_args; - struct nfsd4_saved_compoundargs ts_savedp; + u32 ts_num_ids; + struct list_head ts_stateid_list; }; struct nfsd4_free_stateid { stateid_t fr_stateid; /* request */ - __be32 fr_status; /* response */ }; /* also used for NVERIFY */ @@ -366,7 +386,8 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - int wr_vlen; + struct kvec wr_head; + struct page ** wr_pagelist; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -443,6 +464,7 @@ struct nfsd4_op { /* NFSv4.1 */ struct nfsd4_exchange_id exchange_id; + struct nfsd4_backchannel_ctl backchannel_ctl; struct nfsd4_bind_conn_to_session bind_conn_to_session; struct nfsd4_create_session create_session; struct nfsd4_destroy_session destroy_session; @@ -483,9 +505,7 @@ struct nfsd4_compoundargs { struct nfsd4_compoundres { /* scratch variables for XDR encode */ - __be32 * p; - __be32 * end; - struct xdr_buf * xbuf; + struct xdr_stream xdr; struct svc_rqst * rqstp; u32 taglen; @@ -503,9 +523,21 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) { - return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp); + return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + || nfsd4_is_solo_sequence(resp); } +static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); +void warn_on_nonidempotent_op(struct nfsd4_op *op); + #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) static inline void @@ -529,12 +561,13 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, struct nfsd4_compoundargs *); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); -int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); -void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); -__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, - u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, + u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid *setclid); @@ -542,10 +575,9 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); -extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, - struct nfsd4_sequence *seq); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); extern __be32 nfsd4_create_session(struct svc_rqst *, struct nfsd4_compound_state *, @@ -559,7 +591,7 @@ extern __be32 nfsd4_destroy_session(struct svc_rqst *, extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, - struct nfsd4_open *open); + struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); @@ -592,6 +624,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); +extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); #endif /* diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h new file mode 100644 index 00000000000..c5c55dfb91a --- /dev/null +++ b/fs/nfsd/xdr4cb.h @@ -0,0 +1,23 @@ +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) |
