diff options
Diffstat (limited to 'fs/nfsd')
41 files changed, 17328 insertions, 7206 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig new file mode 100644 index 00000000000..f994e750e0d --- /dev/null +++ b/fs/nfsd/Kconfig @@ -0,0 +1,108 @@ +config NFSD + tristate "NFS server support" + depends on INET + depends on FILE_LOCKING + select LOCKD + select SUNRPC + select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V2_ACL + help + Choose Y here if you want to allow other computers to access + files residing on this system using Sun's Network File System + protocol. To compile the NFS server support as a module, + choose M here: the module will be called nfsd. + + You may choose to use a user-space NFS server instead, in which + case you can choose N here. + + To export local file systems using NFS, you also need to install + user space programs which can be found in the Linux nfs-utils + package, available from http://linux-nfs.org/. More detail about + the Linux NFS server implementation is available via the + exports(5) man page. + + Below you can choose which versions of the NFS protocol are + available to clients mounting the NFS server on this system. + Support for NFS version 2 (RFC 1094) is always available when + CONFIG_NFSD is selected. + + If unsure, say N. + +config NFSD_V2_ACL + bool + depends on NFSD + +config NFSD_V3 + bool "NFS server support for NFS version 3" + depends on NFSD + help + This option enables support in your system's NFS server for + version 3 of the NFS protocol (RFC 1813). + + If unsure, say Y. + +config NFSD_V3_ACL + bool "NFS server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Solaris NFS servers support an auxiliary NFSv3 ACL protocol that + never became an official part of the NFS version 3 protocol. + This protocol extension allows applications on NFS clients to + manipulate POSIX Access Control Lists on files residing on NFS + servers. NFS servers enforce POSIX ACLs on local files whether + this protocol is available or not. + + This option enables support in your system's NFS server for the + NFSv3 ACL protocol extension allowing NFS clients to manipulate + POSIX ACLs on files exported by your system's NFS server. NFS + clients which support the Solaris NFSv3 ACL protocol can then + access and modify ACLs on your NFS server. + + To store ACLs on your NFS server, you also need to enable ACL- + related CONFIG options for your local file systems of choice. + + If unsure, say N. + +config NFSD_V4 + bool "NFS server support for NFS version 4" + depends on NFSD && PROC_FS + select NFSD_V3 + select FS_POSIX_ACL + select SUNRPC_GSS + select CRYPTO + help + This option enables support in your system's NFS server for + version 4 of the NFS protocol (RFC 3530). + + To export files using NFSv4, you need to install additional user + space programs which can be found in the Linux nfs-utils package, + available from http://linux-nfs.org/. + + If unsure, say N. + +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers. + +config NFSD_FAULT_INJECTION + bool "NFS server manual fault injection" + depends on NFSD_V4 && DEBUG_KERNEL + help + This option enables support for manually injecting faults + into the NFS server. This is intended to be used for + testing error recovery on the NFS client. + + If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 9b118ee2019..af32ef06b4f 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h new file mode 100644 index 00000000000..a986ceb6fd0 --- /dev/null +++ b/fs/nfsd/acl.h @@ -0,0 +1,59 @@ +/* + * Common NFSv4 ACL handling definitions. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFS4_ACL_H +#define LINUX_NFS4_ACL_H + +struct nfs4_acl; +struct svc_fh; +struct svc_rqst; + +/* + * Maximum ACL we'll accept from a client; chosen (somewhat + * arbitrarily) so that kmalloc'ing the ACL shouldn't require a + * high-order allocation. This allows 204 ACEs on x86_64: + */ +#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ + / sizeof(struct nfs4_ace)) + +struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_get_whotype(char *, u32); +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); + +int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl); +__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl); + +#endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index d13403e3362..72f44823adb 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -1,15 +1,8 @@ -/* - * linux/fs/nfsd/auth.c - * - * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> - */ +/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/types.h> #include <linux/sched.h> -#include <linux/sunrpc/svc.h> -#include <linux/sunrpc/svcauth.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/export.h> +#include "nfsd.h" +#include "auth.h" int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { @@ -17,7 +10,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) + if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) return f->flags; } return exp->ex_flags; @@ -26,53 +19,72 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { - struct svc_cred cred = rqstp->rq_cred; + struct group_info *rqgi; + struct group_info *gi; + struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); - int ret; + + validate_process_creds(); + + /* discard any old override before preparing the new set */ + revert_creds(get_cred(current->real_cred)); + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; + + rqgi = rqstp->rq_cred.cr_group_info; if (flags & NFSEXP_ALLSQUASH) { - cred.cr_uid = exp->ex_anon_uid; - cred.cr_gid = exp->ex_anon_gid; - cred.cr_group_info = groups_alloc(0); + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; + gi = groups_alloc(0); + if (!gi) + goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { - struct group_info *gi; - if (!cred.cr_uid) - cred.cr_uid = exp->ex_anon_uid; - if (!cred.cr_gid) - cred.cr_gid = exp->ex_anon_gid; - gi = groups_alloc(cred.cr_group_info->ngroups); - if (gi) - for (i = 0; i < cred.cr_group_info->ngroups; i++) { - if (!GROUP_AT(cred.cr_group_info, i)) - GROUP_AT(gi, i) = exp->ex_anon_gid; - else - GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i); - } - cred.cr_group_info = gi; - } else - get_group_info(cred.cr_group_info); - - if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; - else - current->fsuid = exp->ex_anon_uid; - if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; - else - current->fsgid = exp->ex_anon_gid; + if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + new->fsuid = exp->ex_anon_uid; + if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) + new->fsgid = exp->ex_anon_gid; - if (!cred.cr_group_info) - return -ENOMEM; - ret = set_current_groups(cred.cr_group_info); - put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { - current->cap_effective = - cap_drop_nfsd_set(current->cap_effective); + gi = groups_alloc(rqgi->ngroups); + if (!gi) + goto oom; + + for (i = 0; i < rqgi->ngroups; i++) { + if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(rqgi, i); + } } else { - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + gi = get_group_info(rqgi); } - return ret; + + if (uid_eq(new->fsuid, INVALID_UID)) + new->fsuid = exp->ex_anon_uid; + if (gid_eq(new->fsgid, INVALID_GID)) + new->fsgid = exp->ex_anon_gid; + + set_groups(new, gi); + put_group_info(gi); + + if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + validate_process_creds(); + put_cred(override_creds(new)); + put_cred(new); + validate_process_creds(); + return 0; + +oom: + abort_creds(new); + return -ENOMEM; } + diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h index 78b3c0e9382..53325a12ba6 100644 --- a/fs/nfsd/auth.h +++ b/fs/nfsd/auth.h @@ -1,6 +1,5 @@ /* * nfsd-specific authentication stuff. - * uid/gid mapping not yet implemented. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ @@ -8,11 +7,6 @@ #ifndef LINUX_NFSD_AUTH_H #define LINUX_NFSD_AUTH_H -#define nfsd_luid(rq, uid) ((u32)(uid)) -#define nfsd_lgid(rq, gid) ((u32)(gid)) -#define nfsd_ruid(rq, uid) ((u32)(uid)) -#define nfsd_rgid(rq, gid) ((u32)(gid)) - /* * Set the current process's fsuid/fsgid etc to those of the NFS * client user diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h new file mode 100644 index 00000000000..b582f9ab6b2 --- /dev/null +++ b/fs/nfsd/cache.h @@ -0,0 +1,87 @@ +/* + * Request reply cache. This was heavily inspired by the + * implementation in 4.3BSD/4.4BSD. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef NFSCACHE_H +#define NFSCACHE_H + +#include <linux/sunrpc/svc.h> + +/* + * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. + */ +struct svc_cacherep { + struct hlist_node c_hash; + struct list_head c_lru; + + unsigned char c_state, /* unused, inprog, done */ + c_type, /* status, buffer */ + c_secure : 1; /* req came from port < 1024 */ + struct sockaddr_in6 c_addr; + __be32 c_xid; + u32 c_prot; + u32 c_proc; + u32 c_vers; + unsigned int c_len; + __wsum c_csum; + unsigned long c_timestamp; + union { + struct kvec u_vec; + __be32 u_status; + } c_u; +}; + +#define c_replvec c_u.u_vec +#define c_replstat c_u.u_status + +/* cache entry states */ +enum { + RC_UNUSED, + RC_INPROG, + RC_DONE +}; + +/* return values */ +enum { + RC_DROPIT, + RC_REPLY, + RC_DOIT +}; + +/* + * Cache types. + * We may want to add more types one day, e.g. for diropres and + * attrstat replies. Using cache entries with fixed length instead + * of buffer pointers may be more efficient. + */ +enum { + RC_NOCACHE, + RC_REPLSTAT, + RC_REPLBUFF, +}; + +/* + * If requests are retransmitted within this interval, they're dropped. + */ +#define RC_DELAY (HZ/5) + +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + +int nfsd_reply_cache_init(void); +void nfsd_reply_cache_shutdown(void); +int nfsd_cache_lookup(struct svc_rqst *); +void nfsd_cache_update(struct svc_rqst *, int, __be32 *); +int nfsd_reply_cache_stats_open(struct inode *, struct file *); + +#endif /* NFSCACHE_H */ diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h new file mode 100644 index 00000000000..4123551208d --- /dev/null +++ b/fs/nfsd/current_stateid.h @@ -0,0 +1,28 @@ +#ifndef _NFSD4_CURRENT_STATE_H +#define _NFSD4_CURRENT_STATE_H + +#include "state.h" +#include "xdr4.h" + +extern void clear_current_stateid(struct nfsd4_compound_state *cstate); +/* + * functions to set current state id + */ +extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *); +extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *); +extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); + +/* + * functions to consume current state id + */ +extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *); +extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *); +extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *); +extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); +extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *); +extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *); +extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *); + +#endif /* _NFSD4_CURRENT_STATE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8a6f7c924c7..13b85f94d9e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1,7 +1,4 @@ -#define MSNFS /* HACK HACK */ /* - * linux/fs/nfsd/export.c - * * NFS exporting and validation. * * We maintain a list of clients, each of which has a list of @@ -14,36 +11,18 @@ * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> */ -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/stat.h> -#include <linux/in.h> -#include <linux/seq_file.h> -#include <linux/syscalls.h> -#include <linux/rwsem.h> -#include <linux/dcache.h> #include <linux/namei.h> -#include <linux/mount.h> -#include <linux/hash.h> #include <linux/module.h> #include <linux/exportfs.h> +#include <linux/sunrpc/svc_xprt.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/nfsfh.h> -#include <linux/nfsd/syscall.h> -#include <linux/lockd/bind.h> -#include <linux/sunrpc/msg_prot.h> -#include <linux/sunrpc/gss_api.h> +#include "nfsd.h" +#include "nfsfh.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT -typedef struct auth_domain svc_client; -typedef struct svc_export svc_export; - -static void exp_do_unexport(svc_export *unexp); -static int exp_verify_string(char *cp, int max); - /* * We have two caches. * One maps client+vfsmnt+dentry to export options - the export map @@ -56,7 +35,6 @@ static int exp_verify_string(char *cp, int max); #define EXPKEY_HASHBITS 8 #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static struct cache_head *expkey_table[EXPKEY_HASHMAX]; static void expkey_put(struct kref *ref) { @@ -84,13 +62,13 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); -static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); -static struct cache_detail svc_expkey_cache; +static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, + struct svc_expkey *old); +static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) { - /* client fsidtype fsid [path] */ + /* client fsidtype fsid expiry [path] */ char *buf; int len; struct auth_domain *dom = NULL; @@ -98,15 +76,16 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) int fsidtype; char *ep; struct svc_expkey key; - struct svc_expkey *ek; + struct svc_expkey *ek = NULL; - if (mesg[mlen-1] != '\n') + if (mesg[mlen - 1] != '\n') return -EINVAL; mesg[mlen-1] = 0; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); err = -ENOMEM; - if (!buf) goto out; + if (!buf) + goto out; err = -EINVAL; if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) @@ -143,41 +122,39 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) key.ek_fsidtype = fsidtype; memcpy(key.ek_fsid, buf, len); - ek = svc_expkey_lookup(&key); + ek = svc_expkey_lookup(cd, &key); err = -ENOMEM; if (!ek) goto out; /* now we want a pathname, or empty meaning NEGATIVE */ err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len < 0) goto out; dprintk("Path seems to be <%s>\n", buf); err = 0; if (len == 0) { set_bit(CACHE_NEGATIVE, &key.h.flags); - ek = svc_expkey_update(&key, ek); - if (ek) - cache_put(&ek->h, &svc_expkey_cache); - else err = -ENOMEM; + ek = svc_expkey_update(cd, &key, ek); + if (!ek) + err = -ENOMEM; } else { - struct nameidata nd; - err = path_lookup(buf, 0, &nd); + err = kern_path(buf, 0, &key.ek_path); if (err) goto out; dprintk("Found the path %s\n", buf); - key.ek_path = nd.path; - ek = svc_expkey_update(&key, ek); - if (ek) - cache_put(&ek->h, &svc_expkey_cache); - else + ek = svc_expkey_update(cd, &key, ek); + if (!ek) err = -ENOMEM; - path_put(&nd.path); + path_put(&key.ek_path); } cache_flush(); out: + if (ek) + cache_put(&ek->h, cd); if (dom) auth_domain_put(dom); kfree(buf); @@ -253,10 +230,9 @@ static struct cache_head *expkey_alloc(void) return NULL; } -static struct cache_detail svc_expkey_cache = { +static struct cache_detail svc_expkey_cache_template = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, - .hash_table = expkey_table, .name = "nfsd.fh", .cache_put = expkey_put, .cache_request = expkey_request, @@ -268,10 +244,9 @@ static struct cache_detail svc_expkey_cache = { .alloc = expkey_alloc, }; -static struct svc_expkey * -svc_expkey_lookup(struct svc_expkey *item) +static int +svc_expkey_hash(struct svc_expkey *item) { - struct cache_head *ch; int hash = item->ek_fsidtype; char * cp = (char*)item->ek_fsid; int len = key_len(item->ek_fsidtype); @@ -279,9 +254,16 @@ svc_expkey_lookup(struct svc_expkey *item) hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); hash &= EXPKEY_HASHMASK; + return hash; +} - ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, - hash); +static struct svc_expkey * +svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *item) +{ + struct cache_head *ch; + int hash = svc_expkey_hash(item); + + ch = sunrpc_cache_lookup(cd, &item->h, hash); if (ch) return container_of(ch, struct svc_expkey, h); else @@ -289,19 +271,13 @@ svc_expkey_lookup(struct svc_expkey *item) } static struct svc_expkey * -svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) +svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, + struct svc_expkey *old) { struct cache_head *ch; - int hash = new->ek_fsidtype; - char * cp = (char*)new->ek_fsid; - int len = key_len(new->ek_fsidtype); - - hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); - hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS); - hash &= EXPKEY_HASHMASK; + int hash = svc_expkey_hash(new); - ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, - &old->h, hash); + ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct svc_expkey, h); else @@ -311,19 +287,22 @@ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) -#define EXPORT_HASHMASK (EXPORT_HASHMAX -1) - -static struct cache_head *export_table[EXPORT_HASHMAX]; static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) { + struct nfsd4_fs_location *locations = fsloc->locations; int i; + if (!locations) + return; + for (i = 0; i < fsloc->locations_count; i++) { - kfree(fsloc->locations[i].path); - kfree(fsloc->locations[i].hosts); + kfree(locations[i].path); + kfree(locations[i].hosts); } - kfree(fsloc->locations); + + kfree(locations); + fsloc->locations = NULL; } static void svc_export_put(struct kref *ref) @@ -331,8 +310,8 @@ static void svc_export_put(struct kref *ref) struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); - kfree(exp->ex_pathname); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -359,16 +338,25 @@ static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); -static int check_export(struct inode *inode, int flags, unsigned char *uuid) +static int check_export(struct inode *inode, int *flags, unsigned char *uuid) { - /* We currently export only dirs and regular files. - * This is what umountd does. + /* + * We currently export only dirs, regular files, and (for v4 + * pseudoroot) symlinks. */ if (!S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode) && !S_ISREG(inode->i_mode)) return -ENOTDIR; + /* + * Mountd should never pass down a writeable V4ROOT export, but, + * just to make sure: + */ + if (*flags & NFSEXP_V4ROOT) + *flags |= NFSEXP_READONLY; + /* There are two requirements on a filesystem to be exportable. * 1: We must be able to identify the filesystem from a number. * either a device number (so FS_REQUIRES_DEV needed) @@ -377,7 +365,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid) * This means that s_export_op must be set. */ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && - !(flags & NFSEXP_FSID) && + !(*flags & NFSEXP_FSID) && uuid == NULL) { dprintk("exp_export: export of non-dev fs without fsid\n"); return -EINVAL; @@ -401,8 +389,12 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int len; int migrated, i, err; + /* more than one fsloc */ + if (fsloc->locations) + return -EINVAL; + /* listsize */ - err = get_int(mesg, &fsloc->locations_count); + err = get_uint(mesg, &fsloc->locations_count); if (err) return err; if (fsloc->locations_count > MAX_FS_LOCATIONS) @@ -450,31 +442,31 @@ out_free_all: static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { - int listsize, err; struct exp_flavor_info *f; + u32 listsize; + int err; + + /* more than one secinfo */ + if (exp->ex_nflavors) + return -EINVAL; - err = get_int(mesg, &listsize); + err = get_uint(mesg, &listsize); if (err) return err; - if (listsize < 0 || listsize > MAX_SECINFO_LIST) + if (listsize > MAX_SECINFO_LIST) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { - err = get_int(mesg, &f->pseudoflavor); + err = get_uint(mesg, &f->pseudoflavor); if (err) return err; /* - * Just a quick sanity check; we could also try to check - * whether this pseudoflavor is supported, but at worst - * an unsupported pseudoflavor on the export would just - * be a pseudoflavor that won't match the flavor of any - * authenticated request. The administrator will - * probably discover the problem when someone fails to - * authenticate. + * XXX: It would be nice to also check whether this + * pseudoflavor is supported, so we can discover the + * problem at export time instead of when a client fails + * to authenticate. */ - if (f->pseudoflavor < 0) - return -EINVAL; - err = get_int(mesg, &f->flags); + err = get_uint(mesg, &f->flags); if (err) return err; /* Only some flags are allowed to differ between flavors: */ @@ -492,6 +484,27 @@ static inline int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif +static inline int +uuid_parse(char **mesg, char *buf, unsigned char **puuid) +{ + int len; + + /* more than one uuid */ + if (*puuid) + return -EINVAL; + + /* expect a 16 byte uuid encoded as \xXXXX... */ + len = qword_get(mesg, buf, PAGE_SIZE); + if (len != EX_UUID_LEN) + return -EINVAL; + + *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL); + if (*puuid == NULL) + return -ENOMEM; + + return 0; +} + static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) { /* client path expiry [flags anonuid anongid fsid] */ @@ -499,35 +512,22 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) int len; int err; struct auth_domain *dom = NULL; - struct nameidata nd; - struct svc_export exp, *expp; + struct svc_export exp = {}, *expp; int an_int; - nd.path.dentry = NULL; - exp.ex_pathname = NULL; - - /* fs locations */ - exp.ex_fslocs.locations = NULL; - exp.ex_fslocs.locations_count = 0; - exp.ex_fslocs.migrated = 0; - - exp.ex_uuid = NULL; - - /* secinfo */ - exp.ex_nflavors = 0; - if (mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - err = -ENOMEM; - if (!buf) goto out; + if (!buf) + return -ENOMEM; /* client */ - len = qword_get(&mesg, buf, PAGE_SIZE); err = -EINVAL; - if (len <= 0) goto out; + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out; err = -ENOENT; dom = auth_domain_find(buf); @@ -536,25 +536,21 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* path */ err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) - goto out; - err = path_lookup(buf, 0, &nd); - if (err) goto out_no_path; + if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out1; + + err = kern_path(buf, 0, &exp.ex_path); + if (err) + goto out1; - exp.h.flags = 0; exp.ex_client = dom; - exp.ex_path.mnt = nd.path.mnt; - exp.ex_path.dentry = nd.path.dentry; - exp.ex_pathname = kstrdup(buf, GFP_KERNEL); - err = -ENOMEM; - if (!exp.ex_pathname) - goto out; + exp.cd = cd; /* expiry */ err = -EINVAL; exp.h.expiry_time = get_expiry(&mesg); if (exp.h.expiry_time == 0) - goto out; + goto out3; /* flags */ err = get_int(&mesg, &an_int); @@ -562,39 +558,34 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = 0; set_bit(CACHE_NEGATIVE, &exp.h.flags); } else { - if (err || an_int < 0) goto out; + if (err || an_int < 0) + goto out3; exp.ex_flags= an_int; /* anon uid */ err = get_int(&mesg, &an_int); - if (err) goto out; - exp.ex_anon_uid= an_int; + if (err) + goto out3; + exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); /* anon gid */ err = get_int(&mesg, &an_int); - if (err) goto out; - exp.ex_anon_gid= an_int; + if (err) + goto out3; + exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); /* fsid */ err = get_int(&mesg, &an_int); - if (err) goto out; + if (err) + goto out3; exp.ex_fsid = an_int; while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); - else if (strcmp(buf, "uuid") == 0) { - /* expect a 16 byte uuid encoded as \xXXXX... */ - len = qword_get(&mesg, buf, PAGE_SIZE); - if (len != 16) - err = -EINVAL; - else { - exp.ex_uuid = - kmemdup(buf, 16, GFP_KERNEL); - if (exp.ex_uuid == NULL) - err = -ENOMEM; - } - } else if (strcmp(buf, "secinfo") == 0) + else if (strcmp(buf, "uuid") == 0) + err = uuid_parse(&mesg, buf, &exp.ex_uuid); + else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else /* quietly ignore unknown words and anything @@ -603,12 +594,33 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) */ break; if (err) - goto out; + goto out4; } - err = check_export(nd.path.dentry->d_inode, exp.ex_flags, + err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags, exp.ex_uuid); - if (err) goto out; + if (err) + goto out4; + /* + * No point caching this if it would immediately expire. + * Also, this protects exportfs's dummy export from the + * anon_uid/anon_gid checks: + */ + if (exp.h.expiry_time < seconds_since_boot()) + goto out4; + /* + * For some reason exportfs has been passing down an + * invalid (-1) uid & gid on the "dummy" export which it + * uses to test export support. To make sure exportfs + * sees errors from check_export we therefore need to + * delay these checks till after check_export: + */ + err = -EINVAL; + if (!uid_valid(exp.ex_anon_uid)) + goto out4; + if (!gid_valid(exp.ex_anon_gid)) + goto out4; + err = 0; } expp = svc_export_lookup(&exp); @@ -621,21 +633,20 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = -ENOMEM; else exp_put(expp); - out: +out4: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); - kfree(exp.ex_pathname); - if (nd.path.dentry) - path_put(&nd.path); - out_no_path: - if (dom) - auth_domain_put(dom); +out3: + path_put(&exp.ex_path); +out1: + auth_domain_put(dom); +out: kfree(buf); return err; } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs); static void show_secinfo(struct seq_file *m, struct svc_export *exp); static int svc_export_show(struct seq_file *m, @@ -660,7 +671,7 @@ static int svc_export_show(struct seq_file *m, if (exp->ex_uuid) { int i; seq_puts(m, ",uuid="); - for (i=0; i<16; i++) { + for (i = 0; i < EX_UUID_LEN; i++) { if ((i&3) == 0 && i) seq_putc(m, ':'); seq_printf(m, "%02x", exp->ex_uuid[i]); @@ -689,10 +700,11 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_client = item->ex_client; new->ex_path.dentry = dget(item->ex_path.dentry); new->ex_path.mnt = mntget(item->ex_path.mnt); - new->ex_pathname = NULL; new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; + new->cd = item->cd; } static void export_update(struct cache_head *cnew, struct cache_head *citem) @@ -707,8 +719,6 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) new->ex_fsid = item->ex_fsid; new->ex_uuid = item->ex_uuid; item->ex_uuid = NULL; - new->ex_pathname = item->ex_pathname; - item->ex_pathname = NULL; new->ex_fslocs.locations = item->ex_fslocs.locations; item->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; @@ -730,10 +740,9 @@ static struct cache_head *svc_export_alloc(void) return NULL; } -struct cache_detail svc_export_cache = { +static struct cache_detail svc_export_cache_template = { .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, - .hash_table = export_table, .name = "nfsd.export", .cache_put = svc_export_put, .cache_request = svc_export_request, @@ -745,17 +754,24 @@ struct cache_detail svc_export_cache = { .alloc = svc_export_alloc, }; -static struct svc_export * -svc_export_lookup(struct svc_export *exp) +static int +svc_export_hash(struct svc_export *exp) { - struct cache_head *ch; int hash; + hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); + return hash; +} - ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, - hash); +static struct svc_export * +svc_export_lookup(struct svc_export *exp) +{ + struct cache_head *ch; + int hash = svc_export_hash(exp); + + ch = sunrpc_cache_lookup(exp->cd, &exp->h, hash); if (ch) return container_of(ch, struct svc_export, h); else @@ -766,14 +782,9 @@ static struct svc_export * svc_export_update(struct svc_export *new, struct svc_export *old) { struct cache_head *ch; - int hash; - hash = hash_ptr(old->ex_client, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS); - hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS); + int hash = svc_export_hash(old); - ch = sunrpc_cache_update(&svc_export_cache, &new->h, - &old->h, - hash); + ch = sunrpc_cache_update(old->cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct svc_export, h); else @@ -782,7 +793,8 @@ svc_export_update(struct svc_export *new, struct svc_export *old) static struct svc_expkey * -exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) +exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, + u32 *fsidv, struct cache_req *reqp) { struct svc_expkey key, *ek; int err; @@ -794,69 +806,18 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) key.ek_fsidtype = fsid_type; memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - ek = svc_expkey_lookup(&key); + ek = svc_expkey_lookup(cd, &key); if (ek == NULL) return ERR_PTR(-ENOMEM); - err = cache_check(&svc_expkey_cache, &ek->h, reqp); + err = cache_check(cd, &ek->h, reqp); if (err) return ERR_PTR(err); return ek; } -static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, - struct svc_export *exp) -{ - struct svc_expkey key, *ek; - - key.ek_client = clp; - key.ek_fsidtype = fsid_type; - memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_path = exp->ex_path; - key.h.expiry_time = NEVER; - key.h.flags = 0; - - ek = svc_expkey_lookup(&key); - if (ek) - ek = svc_expkey_update(&key,ek); - if (ek) { - cache_put(&ek->h, &svc_expkey_cache); - return 0; - } - return -ENOMEM; -} - -/* - * Find the client's export entry matching xdev/xino. - */ -static inline struct svc_expkey * -exp_get_key(svc_client *clp, dev_t dev, ino_t ino) -{ - u32 fsidv[3]; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_DEV, fsidv, NULL); - } - mk_fsid(FSID_ENCODE_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_ENCODE_DEV, fsidv, NULL); -} - -/* - * Find the client's export entry matching fsid - */ -static inline struct svc_expkey * -exp_get_fsid_key(svc_client *clp, int fsid) -{ - u32 fsidv[2]; - - mk_fsid(FSID_NUM, fsidv, 0, 0, fsid, NULL); - - return exp_find_key(clp, FSID_NUM, fsidv, NULL); -} - -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export * +exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, + const struct path *path, struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -865,13 +826,13 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, return ERR_PTR(-ENOENT); key.ex_client = clp; - key.ex_path.mnt = mnt; - key.ex_path.dentry = dentry; + key.ex_path = *path; + key.cd = cd; exp = svc_export_lookup(&key); if (exp == NULL) return ERR_PTR(-ENOMEM); - err = cache_check(&svc_export_cache, &exp->h, reqp); + err = cache_check(cd, &exp->h, reqp); if (err) return ERR_PTR(err); return exp; @@ -880,284 +841,24 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export * +exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path) { - svc_export *exp; - - dget(dentry); - exp = exp_get_by_name(clp, mnt, dentry, reqp); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = exp_get_by_name(clp, mnt, dentry, reqp); + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = exp_get_by_name(cd, clp, path, NULL); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } -/* - * Hashtable locking. Write locks are placed only by user processes - * wanting to modify export information. - * Write locking only done in this file. Read locking - * needed externally. - */ - -static DECLARE_RWSEM(hash_sem); - -void -exp_readlock(void) -{ - down_read(&hash_sem); -} - -static inline void -exp_writelock(void) -{ - down_write(&hash_sem); -} - -void -exp_readunlock(void) -{ - up_read(&hash_sem); -} - -static inline void -exp_writeunlock(void) -{ - up_write(&hash_sem); -} - -static void exp_fsid_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return; - - ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); - if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; - cache_put(&ek->h, &svc_expkey_cache); - } - svc_expkey_cache.nextcheck = get_seconds(); -} - -static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) -{ - u32 fsid[2]; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return 0; - - mk_fsid(FSID_NUM, fsid, 0, 0, exp->ex_fsid, NULL); - return exp_set_key(clp, FSID_NUM, fsid, exp); -} - -static int exp_hash(struct auth_domain *clp, struct svc_export *exp) -{ - u32 fsid[2]; - struct inode *inode = exp->ex_path.dentry->d_inode; - dev_t dev = inode->i_sb->s_dev; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_DEV, fsid, exp); - } - mk_fsid(FSID_ENCODE_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_ENCODE_DEV, fsid, exp); -} - -static void exp_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - struct inode *inode = exp->ex_path.dentry->d_inode; - - ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); - if (!IS_ERR(ek)) { - ek->h.expiry_time = get_seconds()-1; - cache_put(&ek->h, &svc_expkey_cache); - } - svc_expkey_cache.nextcheck = get_seconds(); -} - -/* - * Export a file system. - */ -int -exp_export(struct nfsctl_export *nxp) -{ - svc_client *clp; - struct svc_export *exp = NULL; - struct svc_export new; - struct svc_expkey *fsid_key = NULL; - struct nameidata nd; - int err; - - /* Consistency check */ - err = -EINVAL; - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - goto out; - - dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n", - nxp->ex_client, nxp->ex_path, - (unsigned)nxp->ex_dev, (long)nxp->ex_ino, - nxp->ex_flags); - - /* Try to lock the export table for update */ - exp_writelock(); - - /* Look up client info */ - if (!(clp = auth_domain_find(nxp->ex_client))) - goto out_unlock; - - - /* Look up the dentry */ - err = path_lookup(nxp->ex_path, 0, &nd); - if (err) - goto out_unlock; - err = -EINVAL; - - exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); - - memset(&new, 0, sizeof(new)); - - /* must make sure there won't be an ex_fsid clash */ - if ((nxp->ex_flags & NFSEXP_FSID) && - (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && - fsid_key->ek_path.mnt && - (fsid_key->ek_path.mnt != nd.path.mnt || - fsid_key->ek_path.dentry != nd.path.dentry)) - goto finish; - - if (!IS_ERR(exp)) { - /* just a flags/id/fsid update */ - - exp_fsid_unhash(exp); - exp->ex_flags = nxp->ex_flags; - exp->ex_anon_uid = nxp->ex_anon_uid; - exp->ex_anon_gid = nxp->ex_anon_gid; - exp->ex_fsid = nxp->ex_dev; - - err = exp_fsid_hash(clp, exp); - goto finish; - } - - err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); - if (err) goto finish; - - err = -ENOMEM; - - dprintk("nfsd: creating export entry %p for client %p\n", exp, clp); - - new.h.expiry_time = NEVER; - new.h.flags = 0; - new.ex_pathname = kstrdup(nxp->ex_path, GFP_KERNEL); - if (!new.ex_pathname) - goto finish; - new.ex_client = clp; - new.ex_path = nd.path; - new.ex_flags = nxp->ex_flags; - new.ex_anon_uid = nxp->ex_anon_uid; - new.ex_anon_gid = nxp->ex_anon_gid; - new.ex_fsid = nxp->ex_dev; - - exp = svc_export_lookup(&new); - if (exp) - exp = svc_export_update(&new, exp); - - if (!exp) - goto finish; - - if (exp_hash(clp, exp) || - exp_fsid_hash(clp, exp)) { - /* failed to create at least one index */ - exp_do_unexport(exp); - cache_flush(); - } else - err = 0; -finish: - kfree(new.ex_pathname); - if (exp) - exp_put(exp); - if (fsid_key && !IS_ERR(fsid_key)) - cache_put(&fsid_key->h, &svc_expkey_cache); - if (clp) - auth_domain_put(clp); - path_put(&nd.path); -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Unexport a file system. The export entry has already - * been removed from the client's list of exported fs's. - */ -static void -exp_do_unexport(svc_export *unexp) -{ - unexp->h.expiry_time = get_seconds()-1; - svc_export_cache.nextcheck = get_seconds(); - exp_unhash(unexp); - exp_fsid_unhash(unexp); -} - - -/* - * unexport syscall. - */ -int -exp_unexport(struct nfsctl_export *nxp) -{ - struct auth_domain *dom; - svc_export *exp; - struct nameidata nd; - int err; - - /* Consistency check */ - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - return -EINVAL; - - exp_writelock(); - - err = -EINVAL; - dom = auth_domain_find(nxp->ex_client); - if (!dom) { - dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client); - goto out_unlock; - } - - err = path_lookup(nxp->ex_path, 0, &nd); - if (err) - goto out_domain; - err = -EINVAL; - exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); - path_put(&nd.path); - if (IS_ERR(exp)) - goto out_domain; - - exp_do_unexport(exp); - exp_put(exp); - err = 0; - -out_domain: - auth_domain_put(dom); - cache_flush(); -out_unlock: - exp_writeunlock(); - return err; -} /* * Obtain the root fh on behalf of a client. @@ -1165,26 +866,29 @@ out_unlock: * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) +exp_rootfh(struct net *net, struct auth_domain *clp, char *name, + struct knfsd_fh *f, int maxsize) { struct svc_export *exp; - struct nameidata nd; + struct path path; struct inode *inode; struct svc_fh fh; int err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; err = -EPERM; /* NB: we probably ought to check that it's NUL-terminated */ - if (path_lookup(path, 0, &nd)) { - printk("nfsd: exp_rootfh path not found %s", path); + if (kern_path(name, 0, &path)) { + printk("nfsd: exp_rootfh path not found %s", name); return err; } - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", - path, nd.path.dentry, clp->name, + name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); + exp = exp_parent(cd, clp, &path); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -1194,7 +898,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - if (fh_compose(&fh, exp, nd.path.dentry, NULL)) + if (fh_compose(&fh, exp, path.dentry, NULL)) err = -EINVAL; else err = 0; @@ -1202,20 +906,22 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) fh_put(&fh); exp_put(exp); out: - path_put(&nd.path); + path_put(&path); return err; } -static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, +static struct svc_export *exp_find(struct cache_detail *cd, + struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) { struct svc_export *exp; - struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); + struct nfsd_net *nn = net_generic(cd->net, nfsd_net_id); + struct svc_expkey *ek = exp_find_key(nn->svc_expkey_cache, clp, fsid_type, fsidv, reqp); if (IS_ERR(ek)) return ERR_CAST(ek); - exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); - cache_put(&ek->h, &svc_expkey_cache); + exp = exp_get_by_name(cd, clp, &ek->ek_path, reqp); + cache_put(&ek->h, nn->svc_expkey_cache); if (IS_ERR(exp)) return ERR_CAST(exp); @@ -1232,13 +938,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) return 0; /* ip-address based client; check sec= export option: */ for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) + if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) return 0; } /* defaults in absence of sec= options: */ if (exp->ex_nflavors == 0) { - if (rqstp->rq_flavor == RPC_AUTH_NULL || - rqstp->rq_flavor == RPC_AUTH_UNIX) + if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || + rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) return 0; } return nfserr_wrongsec; @@ -1254,17 +960,17 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) * use exp_get_by_name() or exp_find(). */ struct svc_export * -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, - &rqstp->rq_chandle); + exp = exp_get_by_name(cd, rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1276,8 +982,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, - &rqstp->rq_chandle); + gssexp = exp_get_by_name(cd, rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1289,12 +994,15 @@ struct svc_export * rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct cache_detail *cd = nn->svc_export_cache; if (rqstp->rq_client == NULL) goto gss; /* First try the auth_unix client: */ - exp = exp_find(rqstp->rq_client, fsid_type, fsidv, &rqstp->rq_chandle); + exp = exp_find(cd, rqstp->rq_client, fsid_type, + fsidv, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1306,7 +1014,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_find(rqstp->rq_gssclient, fsid_type, fsidv, + gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; @@ -1316,24 +1024,29 @@ gss: } struct svc_export * -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) { - struct svc_export *exp; + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); + } + dput(path->dentry); + path->dentry = saved; + return exp; +} - dget(dentry); - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); +struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) +{ + u32 fsidv[2]; - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; + mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); - } - dput(dentry); - return exp; + return rqst_exp_find(rqstp, FSID_NUM, fsidv); } /* @@ -1346,18 +1059,11 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct svc_export *exp; __be32 rv; - u32 fsidv[2]; - - mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); + exp = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); - if (rv) - goto out; - rv = check_nfsd_access(exp, rqstp); -out: exp_put(exp); return rv; } @@ -1365,14 +1071,15 @@ out: /* Iterator */ static void *e_start(struct seq_file *m, loff_t *pos) - __acquires(svc_export_cache.hash_lock) + __acquires(((struct cache_detail *)m->private)->hash_lock) { loff_t n = *pos; unsigned hash, export; struct cache_head *ch; - - exp_readlock(); - read_lock(&svc_export_cache.hash_lock); + struct cache_detail *cd = m->private; + struct cache_head **export_table = cd->hash_table; + + read_lock(&cd->hash_lock); if (!n--) return SEQ_START_TOKEN; hash = n >> 32; @@ -1397,6 +1104,8 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); + struct cache_detail *cd = m->private; + struct cache_head **export_table = cd->hash_table; if (p == SEQ_START_TOKEN) hash = 0; @@ -1419,10 +1128,11 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos) } static void e_stop(struct seq_file *m, void *p) - __releases(svc_export_cache.hash_lock) + __releases(((struct cache_detail *)m->private)->hash_lock) { - read_unlock(&svc_export_cache.hash_lock); - exp_readunlock(); + struct cache_detail *cd = m->private; + + read_unlock(&cd->hash_lock); } static struct flags { @@ -1439,9 +1149,7 @@ static struct flags { { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, -#ifdef MSNFS - { NFSEXP_MSNFS, {"msnfs", ""}}, -#endif + { NFSEXP_V4ROOT, {"v4root", ""}}, { 0, {"", ""}} }; @@ -1465,37 +1173,57 @@ static void show_secinfo_flags(struct seq_file *m, int flags) show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); } +static bool secinfo_flags_equal(int f, int g) +{ + f &= NFSEXP_SECINFO_FLAGS; + g &= NFSEXP_SECINFO_FLAGS; + return f == g; +} + +static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end) +{ + int flags; + + flags = (*fp)->flags; + seq_printf(m, ",sec=%d", (*fp)->pseudoflavor); + (*fp)++; + while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) { + seq_printf(m, ":%d", (*fp)->pseudoflavor); + (*fp)++; + } + return flags; +} + static void show_secinfo(struct seq_file *m, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; - int lastflags = 0, first = 0; + int flags; if (exp->ex_nflavors == 0) return; - for (f = exp->ex_flavors; f < end; f++) { - if (first || f->flags != lastflags) { - if (!first) - show_secinfo_flags(m, lastflags); - seq_printf(m, ",sec=%d", f->pseudoflavor); - lastflags = f->flags; - } else { - seq_printf(m, ":%d", f->pseudoflavor); - } + f = exp->ex_flavors; + flags = show_secinfo_run(m, &f, end); + if (!secinfo_flags_equal(flags, exp->ex_flags)) + show_secinfo_flags(m, flags); + while (f != end) { + flags = show_secinfo_run(m, &f, end); + show_secinfo_flags(m, flags); } - show_secinfo_flags(m, lastflags); } static void exp_flags(struct seq_file *m, int flag, int fsid, - uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) + kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc) { show_expflags(m, flag, NFSEXP_ALLFLAGS); if (flag & NFSEXP_FSID) seq_printf(m, ",fsid=%d", fsid); - if (anonu != (uid_t)-2 && anonu != (0x10000-2)) - seq_printf(m, ",anonuid=%u", anonu); - if (anong != (gid_t)-2 && anong != (0x10000-2)) - seq_printf(m, ",anongid=%u", anong); + if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) && + !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu)); + if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) && + !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2))) + seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong)); if (fsloc && fsloc->locations_count > 0) { char *loctype = (fsloc->migrated) ? "refer" : "replicas"; int i; @@ -1517,6 +1245,7 @@ static int e_show(struct seq_file *m, void *p) { struct cache_head *cp = p; struct svc_export *exp = container_of(cp, struct svc_export, h); + struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) { seq_puts(m, "# Version 1.1\n"); @@ -1525,13 +1254,13 @@ static int e_show(struct seq_file *m, void *p) } cache_get(&exp->h); - if (cache_check(&svc_export_cache, &exp->h, NULL)) + if (cache_check(cd, &exp->h, NULL)) return 0; - cache_put(&exp->h, &svc_export_cache); - return svc_export_show(m, &svc_export_cache, cp); + exp_put(exp); + return svc_export_show(m, cd, cp); } -struct seq_operations nfs_exports_op = { +const struct seq_operations nfs_exports_op = { .start = e_start, .next = e_next, .stop = e_stop, @@ -1539,139 +1268,69 @@ struct seq_operations nfs_exports_op = { }; /* - * Add or modify a client. - * Change requests may involve the list of host addresses. The list of - * exports and possibly existing uid maps are left untouched. - */ -int -exp_addclient(struct nfsctl_client *ncp) -{ - struct auth_domain *dom; - int i, err; - - /* First, consistency check. */ - err = -EINVAL; - if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; - if (ncp->cl_naddr > NFSCLNT_ADDRMAX) - goto out; - - /* Lock the hashtable */ - exp_writelock(); - - dom = unix_domain_find(ncp->cl_ident); - - err = -ENOMEM; - if (!dom) - goto out_unlock; - - /* Insert client into hashtable. */ - for (i = 0; i < ncp->cl_naddr; i++) - auth_unix_add_addr(ncp->cl_addrlist[i], dom); - - auth_unix_forget_old(dom); - auth_domain_put(dom); - - err = 0; - -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Delete a client given an identifier. + * Initialize the exports module. */ int -exp_delclient(struct nfsctl_client *ncp) +nfsd_export_init(struct net *net) { - int err; - struct auth_domain *dom; + int rv; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - err = -EINVAL; - if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; + dprintk("nfsd: initializing export module (net: %p).\n", net); - /* Lock the hashtable */ - exp_writelock(); + nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net); + if (IS_ERR(nn->svc_export_cache)) + return PTR_ERR(nn->svc_export_cache); + rv = cache_register_net(nn->svc_export_cache, net); + if (rv) + goto destroy_export_cache; - dom = auth_domain_find(ncp->cl_ident); - /* just make sure that no addresses work - * and that it will expire soon - */ - if (dom) { - err = auth_unix_forget_old(dom); - auth_domain_put(dom); + nn->svc_expkey_cache = cache_create_net(&svc_expkey_cache_template, net); + if (IS_ERR(nn->svc_expkey_cache)) { + rv = PTR_ERR(nn->svc_expkey_cache); + goto unregister_export_cache; } - - exp_writeunlock(); -out: - return err; -} - -/* - * Verify that string is non-empty and does not exceed max length. - */ -static int -exp_verify_string(char *cp, int max) -{ - int i; - - for (i = 0; i < max; i++) - if (!cp[i]) - return i; - cp[i] = 0; - printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); + rv = cache_register_net(nn->svc_expkey_cache, net); + if (rv) + goto destroy_expkey_cache; return 0; -} -/* - * Initialize the exports module. - */ -int -nfsd_export_init(void) -{ - int rv; - dprintk("nfsd: initializing export module.\n"); - - rv = cache_register(&svc_export_cache); - if (rv) - return rv; - rv = cache_register(&svc_expkey_cache); - if (rv) - cache_unregister(&svc_export_cache); +destroy_expkey_cache: + cache_destroy_net(nn->svc_expkey_cache, net); +unregister_export_cache: + cache_unregister_net(nn->svc_export_cache, net); +destroy_export_cache: + cache_destroy_net(nn->svc_export_cache, net); return rv; - } /* * Flush exports table - called when last nfsd thread is killed */ void -nfsd_export_flush(void) +nfsd_export_flush(struct net *net) { - exp_writelock(); - cache_purge(&svc_expkey_cache); - cache_purge(&svc_export_cache); - exp_writeunlock(); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + cache_purge(nn->svc_expkey_cache); + cache_purge(nn->svc_export_cache); } /* * Shutdown the exports module. */ void -nfsd_export_shutdown(void) +nfsd_export_shutdown(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("nfsd: shutting down export module.\n"); - - exp_writelock(); + dprintk("nfsd: shutting down export module (net: %p).\n", net); - cache_unregister(&svc_expkey_cache); - cache_unregister(&svc_export_cache); - svcauth_unix_purge(); + cache_unregister_net(nn->svc_expkey_cache, net); + cache_unregister_net(nn->svc_export_cache, net); + cache_destroy_net(nn->svc_expkey_cache, net); + cache_destroy_net(nn->svc_export_cache, net); + svcauth_unix_purge(net); - exp_writeunlock(); - dprintk("nfsd: export shutdown complete.\n"); + dprintk("nfsd: export shutdown complete (net: %p).\n", net); } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h new file mode 100644 index 00000000000..cfeea85c5be --- /dev/null +++ b/fs/nfsd/export.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef NFSD_EXPORT_H +#define NFSD_EXPORT_H + +#include <linux/sunrpc/cache.h> +#include <uapi/linux/nfsd/export.h> + +struct knfsd_fh; +struct svc_fh; +struct svc_rqst; + +/* + * FS Locations + */ + +#define MAX_FS_LOCATIONS 128 + +struct nfsd4_fs_location { + char *hosts; /* colon separated list of hosts */ + char *path; /* slash separated list of path components */ +}; + +struct nfsd4_fs_locations { + uint32_t locations_count; + struct nfsd4_fs_location *locations; +/* If we're not actually serving this data ourselves (only providing a + * list of replicas that do serve it) then we set "migrated": */ + int migrated; +}; + +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the foreseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 +#define EX_UUID_LEN 16 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + +struct svc_export { + struct cache_head h; + struct auth_domain * ex_client; + int ex_flags; + struct path ex_path; + kuid_t ex_anon_uid; + kgid_t ex_anon_gid; + int ex_fsid; + unsigned char * ex_uuid; /* 16 byte fsid */ + struct nfsd4_fs_locations ex_fslocs; + uint32_t ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; + struct cache_detail *cd; +}; + +/* an "export key" (expkey) maps a filehandlefragement to an + * svc_export for a given client. There can be several per export, + * for the different fsid types. + */ +struct svc_expkey { + struct cache_head h; + + struct auth_domain * ek_client; + int ek_fsidtype; + u32 ek_fsid[6]; + + struct path ek_path; +}; + +#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) +#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) +#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) + +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); + +/* + * Function declarations + */ +int nfsd_export_init(struct net *); +void nfsd_export_shutdown(struct net *); +void nfsd_export_flush(struct net *); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct path *); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct path *); +struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); +int exp_rootfh(struct net *, struct auth_domain *, + char *path, struct knfsd_fh *, int maxsize); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); + +static inline void exp_put(struct svc_export *exp) +{ + cache_put(&exp->h, exp->cd); +} + +static inline void exp_get(struct svc_export *exp) +{ + cache_get(&exp->h); +} +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); + +#endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c new file mode 100644 index 00000000000..2ed05c3cd43 --- /dev/null +++ b/fs/nfsd/fault_inject.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> + * + * Uses debugfs to create fault injection points for client testing + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/nsproxy.h> +#include <linux/sunrpc/addr.h> +#include <asm/uaccess.h> + +#include "state.h" +#include "netns.h" + +struct nfsd_fault_inject_op { + char *file; + u64 (*forget)(struct nfs4_client *, u64); + u64 (*print)(struct nfs4_client *, u64); +}; + +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .forget = nfsd_forget_client, + .print = nfsd_print_client, + }, + { + .file = "forget_locks", + .forget = nfsd_forget_client_locks, + .print = nfsd_print_client_locks, + }, + { + .file = "forget_openowners", + .forget = nfsd_forget_client_openowners, + .print = nfsd_print_client_openowners, + }, + { + .file = "forget_delegations", + .forget = nfsd_forget_client_delegations, + .print = nfsd_print_client_delegations, + }, + { + .file = "recall_delegations", + .forget = nfsd_recall_client_delegations, + .print = nfsd_print_client_delegations, + }, +}; + +static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); +static struct dentry *debug_dir; + +static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) +{ + u64 count = 0; + + if (val == 0) + printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); + else + printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + + nfs4_lock_state(); + count = nfsd_for_n_state(val, op->forget); + nfs4_unlock_state(); + printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); +} + +static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, + size_t addr_size) +{ + char buf[INET6_ADDRSTRLEN]; + struct nfs4_client *clp; + u64 count; + + nfs4_lock_state(); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + count = op->forget(clp, 0); + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); + } + nfs4_unlock_state(); +} + +static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) +{ + nfs4_lock_state(); + *val = nfsd_for_n_state(0, op->print); + nfs4_unlock_state(); +} + +static ssize_t fault_inject_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + static u64 val; + char read_buf[25]; + size_t size; + loff_t pos = *ppos; + + if (!pos) + nfsd_inject_get(file_inode(file)->i_private, &val); + size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); + + return simple_read_from_buffer(buf, len, ppos, read_buf, size); +} + +static ssize_t fault_inject_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + char write_buf[INET6_ADDRSTRLEN]; + size_t size = min(sizeof(write_buf) - 1, len); + struct net *net = current->nsproxy->net_ns; + struct sockaddr_storage sa; + u64 val; + + if (copy_from_user(write_buf, buf, size)) + return -EFAULT; + write_buf[size] = '\0'; + + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); + if (size > 0) + nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); + else { + val = simple_strtoll(write_buf, NULL, 0); + nfsd_inject_set(file_inode(file)->i_private, val); + } + return len; /* on success, claim we got the whole input */ +} + +static const struct file_operations fops_nfsd = { + .owner = THIS_MODULE, + .read = fault_inject_read, + .write = fault_inject_write, +}; + +void nfsd_fault_inject_cleanup(void) +{ + debugfs_remove_recursive(debug_dir); +} + +int nfsd_fault_inject_init(void) +{ + unsigned int i; + struct nfsd_fault_inject_op *op; + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + + debug_dir = debugfs_create_dir("nfsd", NULL); + if (!debug_dir) + goto fail; + + for (i = 0; i < NUM_INJECT_OPS; i++) { + op = &inject_ops[i]; + if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) + goto fail; + } + return 0; + +fail: + nfsd_fault_inject_cleanup(); + return -ENOMEM; +} diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h new file mode 100644 index 00000000000..a3f34900091 --- /dev/null +++ b/fs/nfsd/idmap.h @@ -0,0 +1,62 @@ +/* + * Mapping of UID to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. +> * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LINUX_NFSD_IDMAP_H +#define LINUX_NFSD_IDMAP_H + +#include <linux/in.h> +#include <linux/sunrpc/svc.h> + +/* XXX from linux/nfs_idmap.h */ +#define IDMAP_NAMESZ 128 + +#ifdef CONFIG_NFSD_V4 +int nfsd_idmap_init(struct net *); +void nfsd_idmap_shutdown(struct net *); +#else +static inline int nfsd_idmap_init(struct net *net) +{ + return 0; +} +static inline void nfsd_idmap_shutdown(struct net *net) +{ +} +#endif + +__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *); +__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *); +__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t); +__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t); + +#endif /* LINUX_NFSD_IDMAP_H */ diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 9e4a568a501..77e7a5cca88 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/lockd.c - * * This file contains all the stubs needed when communicating with lockd. * This level of indirection is necessary so we can run nfsd+lockd without * requiring the nfs client to be compiled in/loaded, and vice versa. @@ -8,17 +6,20 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/types.h> -#include <linux/fs.h> #include <linux/file.h> -#include <linux/mount.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> #include <linux/lockd/bind.h> +#include "nfsd.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_LOCKD +#ifdef CONFIG_LOCKD_V4 +#define nlm_stale_fh nlm4_stale_fh +#define nlm_failed nlm4_failed +#else +#define nlm_stale_fh nlm_lck_denied_nolocks +#define nlm_failed nlm_lck_denied_nolocks +#endif /* * Note: we hold the dentry use count while the file is open. */ @@ -34,11 +35,8 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); fh.fh_export = NULL; - exp_readlock(); - nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); + nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); - rqstp->rq_client = NULL; - exp_readunlock(); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. */ @@ -47,12 +45,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) return 0; case nfserr_dropit: return nlm_drop_reply; -#ifdef CONFIG_LOCKD_V4 case nfserr_stale: - return nlm4_stale_fh; -#endif + return nlm_stale_fh; default: - return nlm_lck_denied; + return nlm_failed; } } @@ -65,7 +61,6 @@ nlm_fclose(struct file *filp) static struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ - .get_grace_period = get_nfs4_grace_period, }; void diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h new file mode 100644 index 00000000000..d32b3aa6600 --- /dev/null +++ b/fs/nfsd/netns.h @@ -0,0 +1,112 @@ +/* + * per net namespace data structures for nfsd + * + * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __NFSD_NETNS_H__ +#define __NFSD_NETNS_H__ + +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define LOCKOWNER_INO_HASH_BITS 8 +#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) + +#define SESSION_HASH_SIZE 512 + +struct cld_net; +struct nfsd4_client_tracking_ops; + +struct nfsd_net { + struct cld_net *cld_net; + + struct cache_detail *svc_expkey_cache; + struct cache_detail *svc_export_cache; + + struct cache_detail *idtoname_cache; + struct cache_detail *nametoid_cache; + + struct lock_manager nfsd4_manager; + bool grace_ended; + time_t boot_time; + + /* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_name_tree hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_name_tree hold unconfirmed + * setclientid info. + */ + struct list_head *reclaim_str_hashtbl; + int reclaim_str_hashtbl_size; + struct list_head *conf_id_hashtbl; + struct rb_root conf_name_tree; + struct list_head *unconf_id_hashtbl; + struct rb_root unconf_name_tree; + struct list_head *ownerstr_hashtbl; + struct list_head *lockowner_ino_hashtbl; + struct list_head *sessionid_hashtbl; + /* + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + * + * All of the above fields are protected by the client_mutex. + */ + struct list_head client_lru; + struct list_head close_lru; + struct list_head del_recall_lru; + + struct delayed_work laundromat_work; + + /* client_lock protects the client lru list and session hash table */ + spinlock_t client_lock; + + struct file *rec_file; + bool in_grace; + struct nfsd4_client_tracking_ops *client_tracking_ops; + + time_t nfsd4_lease; + time_t nfsd4_grace; + + bool nfsd_net_up; + bool lockd_up; + + /* + * Time of server startup + */ + struct timeval nfssvc_boot; + + struct svc_serv *nfsd_serv; +}; + +/* Simple check to find out if a given net was properly initialized */ +#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) + +extern int nfsd_net_id; +#endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1c3b7654e96..12b023a7ab7 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -1,19 +1,16 @@ /* - * linux/fs/nfsd/nfs2acl.c - * * Process version 2 NFSACL requests. * * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> */ -#include <linux/sunrpc/svc.h> -#include <linux/nfs.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/xdr3.h> -#include <linux/posix_acl.h> +#include "nfsd.h" +/* FIXME: nfsacl.h is a broken header */ #include <linux/nfsacl.h> +#include <linux/gfp.h> +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PROC #define RETURN_STATUS(st) { resp->status = (st); return (st); } @@ -33,36 +30,36 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; + nfserr = fh_getattr(fh, &resp->stat); + if (nfserr) + goto fail; + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -70,17 +67,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -101,28 +91,51 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); - - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; + + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + if (error) + goto out_drop_write; + + fh_drop_write(fh); + + nfserr = fh_getattr(fh, &resp->stat); + +out: /* argp->acl_{access,default} may have been allocated in nfssvc_decode_setaclargs. */ posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); return nfserr; +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); + goto out; } /* @@ -131,10 +144,15 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp, static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) { + __be32 nfserr; dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); + return nfserr; } /* @@ -152,6 +170,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + if (nfserr) + return nfserr; + nfserr = fh_getattr(&resp->fh, &resp->stat); return nfserr; } @@ -161,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p); p++; @@ -176,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->mask = ntohl(*p++); if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -197,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -205,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *argp) { - if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) return 0; argp->access = ntohl(*p++); @@ -216,6 +241,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, * XDR encode functions */ +/* + * There must be an encoding function for void results so svc_process + * will work properly. + */ +static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + /* GETACL */ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclres *resp) @@ -236,7 +270,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, return 0; inode = dentry->d_inode; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->mask); if (!xdr_ressize_check(rqstp, p)) return 0; @@ -246,7 +280,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } @@ -267,7 +301,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, struct nfsd_attrstat *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -275,7 +309,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessres *resp) { - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->access); return xdr_ressize_check(rqstp, p); } @@ -307,7 +341,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p, } #define nfsaclsvc_decode_voidargs NULL -#define nfsaclsvc_encode_voidres NULL #define nfsaclsvc_release_void NULL #define nfsd3_fhandleargs nfsd_fhandle #define nfsd3_attrstatres nfsd_attrstat @@ -345,5 +378,5 @@ struct svc_version nfsd_acl_version2 = { .vs_proc = nfsd_acl_procedures2, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, - .vs_hidden = 1, + .vs_hidden = 0, }; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index b647f2f872d..2a514e21dc7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -1,18 +1,16 @@ /* - * linux/fs/nfsd/nfs3acl.c - * * Process version 3 NFSACL requests. * * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de> */ -#include <linux/sunrpc/svc.h> -#include <linux/nfs3.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr3.h> -#include <linux/posix_acl.h> +#include "nfsd.h" +/* FIXME: nfsacl.h is a broken header */ #include <linux/nfsacl.h> +#include <linux/gfp.h> +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" #define RETURN_STATUS(st) { resp->status = (st); return (st); } @@ -31,34 +29,30 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) { - svc_fh *fh; struct posix_acl *acl; + struct inode *inode; + svc_fh *fh; __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); - if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (nfserr) RETURN_STATUS(nfserr); + inode = fh->fh_dentry->d_inode; + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) RETURN_STATUS(nfserr_inval); resp->mask = argp->mask; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { - acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + acl = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ - - struct inode *inode = fh->fh_dentry->d_inode; acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } resp->acl_access = acl; @@ -66,17 +60,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { /* Check how Solaris handles requests for the Default ACL of a non-directory! */ - - acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - int err = PTR_ERR(acl); - - if (err == -ENODATA || err == -EOPNOTSUPP) - acl = NULL; - else { - nfserr = nfserrno(err); - goto fail; - } + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; } resp->acl_default = acl; } @@ -97,21 +84,37 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, struct nfsd3_setaclargs *argp, struct nfsd3_attrstat *resp) { + struct inode *inode; svc_fh *fh; __be32 nfserr = 0; + int error; fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); - - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_ACCESS, argp->acl_access) ); - } - if (!nfserr) { - nfserr = nfserrno( nfsd_set_posix_acl( - fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (nfserr) + goto out; + + inode = fh->fh_dentry->d_inode; + if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) { + error = -EOPNOTSUPP; + goto out_errno; } + error = fh_want_write(fh); + if (error) + goto out_errno; + + error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS); + if (error) + goto out_drop_write; + error = inode->i_op->set_acl(inode, argp->acl_default, + ACL_TYPE_DEFAULT); + +out_drop_write: + fh_drop_write(fh); +out_errno: + nfserr = nfserrno(error); +out: /* argp->acl_{access,default} may have been allocated in nfs3svc_decode_setaclargs. */ posix_acl_release(argp->acl_access); @@ -125,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp, static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_getaclargs *args) { - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p); p++; @@ -140,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p, unsigned int base; int n; - if (!(p = nfs3svc_decode_fh(p, &args->fh))) + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) return 0; args->mask = ntohl(*p++); if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || @@ -185,7 +190,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { - if (!rqstp->rq_respages[rqstp->rq_resused++]) + if (!*(rqstp->rq_next_page++)) return 0; w -= PAGE_SIZE; } @@ -263,6 +268,6 @@ struct svc_version nfsd_acl_version3 = { .vs_proc = nfsd_acl_procedures3, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS3_SVC_XDRSIZE, - .vs_hidden = 1, + .vs_hidden = 0, }; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index c721a1e6e9d..40128991313 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -1,29 +1,16 @@ /* - * linux/fs/nfsd/nfs3proc.c - * * Process version 3 NFS requests. * * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/linkage.h> -#include <linux/time.h> -#include <linux/errno.h> #include <linux/fs.h> #include <linux/ext2_fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/major.h> - -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr3.h> -#include <linux/nfs3.h> +#include <linux/magic.h> + +#include "cache.h" +#include "xdr3.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -56,20 +43,18 @@ static __be32 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int err; __be32 nfserr; dprintk("nfsd: GETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); if (nfserr) RETURN_STATUS(nfserr); - err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, &resp->stat); - nfserr = nfserrno(err); + nfserr = fh_getattr(&resp->fh, &resp->stat); RETURN_STATUS(nfserr); } @@ -163,10 +148,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, __be32 nfserr; u32 max_blocksize = svc_max_payload(rqstp); - dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", + dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), (unsigned long) argp->count, - (unsigned long) argp->offset); + (unsigned long long) argp->offset); /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) @@ -180,7 +165,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_read(rqstp, &resp->fh, NULL, + nfserr = nfsd_read(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, argp->vlen, &resp->count); @@ -201,11 +186,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, struct nfsd3_writeres *resp) { __be32 nfserr; + unsigned long cnt = argp->len; - dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", + dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), argp->len, - (unsigned long) argp->offset, + (unsigned long long) argp->offset, argp->stable? " stable" : ""); fh_copy(&resp->fh, &argp->fh); @@ -213,9 +199,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, nfserr = nfsd_write(rqstp, &resp->fh, NULL, argp->offset, rqstp->rq_vec, argp->vlen, - argp->len, + &cnt, &resp->committed); - resp->count = argp->count; + resp->count = cnt; RETURN_STATUS(nfserr); } @@ -242,7 +228,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, attr = &argp->attrs; /* Get the directory inode */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE); + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE); if (nfserr) RETURN_STATUS(nfserr); @@ -256,9 +242,9 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, } /* Now create the file and set attributes */ - nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, - argp->createmode, argp->verf, NULL, NULL); + argp->createmode, (u32 *)argp->verf, NULL, NULL); RETURN_STATUS(nfserr); } @@ -282,7 +268,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &argp->attrs, S_IFDIR, 0, &resp->fh); - + fh_unlock(&resp->dirfh); RETURN_STATUS(nfserr); } @@ -338,7 +324,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp, type = nfs3_ftypes[argp->ftype]; nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &argp->attrs, type, rdev, &resp->fh); - + fh_unlock(&resp->dirfh); RETURN_STATUS(nfserr); } @@ -359,6 +345,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); RETURN_STATUS(nfserr); } @@ -378,6 +365,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, fh_copy(&resp->fh, &argp->fh); nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); RETURN_STATUS(nfserr); } @@ -469,7 +457,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, __be32 nfserr; int count = 0; loff_t offset; - int i; + struct page **p; caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", @@ -493,8 +481,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - for (i=1; i<rqstp->rq_resused ; i++) { - page_addr = page_address(rqstp->rq_respages[i]); + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); if (((caddr_t)resp->buffer >= page_addr) && ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { @@ -530,7 +518,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: FSSTAT(3) %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); RETURN_STATUS(nfserr); } @@ -558,7 +546,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->f_maxfilesize = ~(u32) 0; resp->f_properties = NFS3_FSF_DEFAULT; - nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); /* Check special features of the file system. May request * different read/write sizes for file systems known to have @@ -567,7 +556,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; /* Note that we don't care for remote fs's here */ - if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { + if (sb->s_magic == MSDOS_SUPER_MAGIC) { resp->f_properties = NFS3_FSF_BILLYBOY; } resp->f_maxfilesize = sb->s_maxbytes; @@ -597,7 +586,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->p_case_insensitive = 0; resp->p_case_preserving = 1; - nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); if (nfserr == 0) { struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; @@ -608,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->p_link_max = EXT2_LINK_MAX; resp->p_name_max = EXT2_NAME_LEN; break; - case 0x4d44: /* MSDOS_SUPER_MAGIC */ + case MSDOS_SUPER_MAGIC: resp->p_case_insensitive = 1; resp->p_case_preserving = 0; break; @@ -648,8 +637,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp, * NFSv3 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfs3svc_decode_voidargs NULL -#define nfs3svc_release_void NULL #define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat @@ -682,28 +669,219 @@ struct nfsd3_voidargs { int dummy; }; #define WC (7+pAT) /* WCC attributes */ static struct svc_procedure nfsd_procedures3[22] = { - PROC(null, void, void, void, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), - PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), - PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), - PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4), - PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), - PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC), - PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC), - PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0), - PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0), - PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1), - PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12), - PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6), - PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2), + [NFS3PROC_NULL] = { + .pc_func = (svc_procfunc) nfsd3_proc_null, + .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFS3PROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_getattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_attrstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFS3PROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_setattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd3_proc_lookup, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+pAT+pAT, + }, + [NFS3PROC_ACCESS] = { + .pc_func = (svc_procfunc) nfsd3_proc_access, + .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_ressize = sizeof(struct nfsd3_accessres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1, + }, + [NFS3PROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_readlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readlinkargs), + .pc_ressize = sizeof(struct nfsd3_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, + }, + [NFS3PROC_READ] = { + .pc_func = (svc_procfunc) nfsd3_proc_read, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_ressize = sizeof(struct nfsd3_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, + }, + [NFS3PROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd3_proc_write, + .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_ressize = sizeof(struct nfsd3_writeres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+4, + }, + [NFS3PROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd3_proc_create, + .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_mkdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_symlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKNOD] = { + .pc_func = (svc_procfunc) nfsd3_proc_mknod, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd3_proc_remove, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_rmdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd3_proc_rename, + .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_ressize = sizeof(struct nfsd3_renameres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+WC, + }, + [NFS3PROC_LINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_link, + .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_ressize = sizeof(struct nfsd3_linkres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+pAT+WC, + }, + [NFS3PROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_READDIRPLUS] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdirplus, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_FSSTAT] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsstat, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+2*6+1, + }, + [NFS3PROC_FSINFO] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsinfo, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsinfores), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+12, + }, + [NFS3PROC_PATHCONF] = { + .pc_func = (svc_procfunc) nfsd3_proc_pathconf, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_pathconfres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+6, + }, + [NFS3PROC_COMMIT] = { + .pc_func = (svc_procfunc) nfsd3_proc_commit, + .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_ressize = sizeof(struct nfsd3_commitres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+WC+2, + }, }; struct svc_version nfsd_version3 = { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 17d0dd99720..e6c01e80325 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/nfs3xdr.c - * * XDR support for nfsd/protocol version 3. * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> @@ -8,20 +6,12 @@ * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! */ -#include <linux/types.h> -#include <linux/time.h> -#include <linux/nfs3.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/dcache.h> #include <linux/namei.h> -#include <linux/mm.h> -#include <linux/vfs.h> -#include <linux/sunrpc/xdr.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/xdr3.h> +#include <linux/sunrpc/svc_xprt.h> +#include "xdr3.h" #include "auth.h" +#include "netns.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -116,12 +106,14 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_mode = ntohl(*p++); } if (*p++) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++)); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if (*p++) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++)); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if (*p++) { u64 newsize; @@ -176,10 +168,10 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { @@ -215,10 +207,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode) { - int err; + __be32 err; struct kstat stat; - err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); + err = fh_getattr(fhp, &stat); if (!err) { *p++ = xdr_one; /* attributes follow */ lease_get_mtime(dentry->d_inode, &stat.mtime); @@ -265,16 +257,18 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) */ void fill_post_wcc(struct svc_fh *fhp) { - int err; + __be32 err; if (fhp->fh_post_saved) printk("nfsd: inode locked twice during operation.\n"); - err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, - &fhp->fh_post_attr); - if (err) + err = fh_getattr(fhp, &fhp->fh_post_attr); + fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; + if (err) { fhp->fh_post_saved = 0; - else + /* Grab the ctime anyway - set_change_info might use it */ + fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime; + } else fhp->fh_post_saved = 1; } @@ -284,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp) int nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -293,7 +288,8 @@ int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_sattrargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = decode_sattr3(p, &args->attrs); @@ -321,7 +317,8 @@ int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_accessargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->access = ntohl(*p++); @@ -333,10 +330,11 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readargs *args) { unsigned int len; - int v,pn; + int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -348,8 +346,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, /* set up the kvec */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -365,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); @@ -471,8 +471,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, len = ntohl(*p++); if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) return 0; - args->tname = new = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->tname = new = page_address(*(rqstp->rq_next_page++)); args->tlen = len; /* first copy and check from the first page */ old = (char*)p; @@ -541,10 +540,10 @@ int nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -565,7 +564,8 @@ int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -575,8 +575,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = - page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -585,10 +584,11 @@ int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_readdirargs *args) { - int len, pn; + int len; u32 max_blocksize = svc_max_payload(rqstp); - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; @@ -600,9 +600,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->count = len; while (len > 0) { - pn = rqstp->rq_resused++; + struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) - args->buffer = page_address(rqstp->rq_respages[pn]); + args->buffer = page_address(p); len -= PAGE_SIZE; } @@ -613,7 +613,8 @@ int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); @@ -712,7 +713,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p, *p++ = htonl(resp->eof); *p++ = htonl(resp->count); /* xdr opaque count */ xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data aswell */ + /* now update rqstp->rq_res to reflect data as well */ rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ @@ -730,12 +731,14 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_writeres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } @@ -813,51 +816,59 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, return p; } -static __be32 * -encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, - struct svc_fh *fhp) -{ - p = encode_post_op_attr(cd->rqstp, p, fhp); - *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, fhp); - fh_put(fhp); - return p; -} - -static int +static __be32 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, const char *name, int namlen) { struct svc_export *exp; struct dentry *dparent, *dchild; - int rv = 0; + __be32 rv = nfserr_noent; dparent = cd->fh.fh_dentry; exp = cd->fh.fh_export; - fh_init(fhp, NFS3_FHSIZE); if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - if (dchild == dparent) { - /* filesystem root - cannot return filehandle for ".." */ - dput(dchild); - return 1; - } + /* filesystem root - cannot return filehandle for ".." */ + if (dchild == dparent) + goto out; } else dchild = dget(dparent); } else dchild = lookup_one_len(name, dparent, namlen); if (IS_ERR(dchild)) - return 1; - if (d_mountpoint(dchild) || - fh_compose(fhp, exp, dchild, &cd->fh) != 0 || - !dchild->d_inode) - rv = 1; + return rv; + if (d_mountpoint(dchild)) + goto out; + if (!dchild->d_inode) + goto out; + rv = fh_compose(fhp, exp, dchild, &cd->fh); +out: dput(dchild); return rv; } +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +{ + struct svc_fh *fh = &cd->scratch; + __be32 err; + + fh_init(fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, fh, name, namlen); + if (err) { + *p++ = 0; + *p++ = 0; + goto out; + } + p = encode_post_op_attr(cd->rqstp, p, fh); + *p++ = xdr_one; /* yes, a file handle follows */ + p = encode_fh(p, fh); +out: + fh_put(fh); + return p; +} + /* * Encode a directory entry. This one works for both normal readdir * and readdirplus. @@ -878,7 +889,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, common); __be32 *p = cd->buffer; caddr_t curr_page_addr = NULL; - int pn; /* current page number */ + struct page ** page; int slen; /* string (name) length */ int elen; /* estimated entry length in words */ int num_entry_words = 0; /* actual number of words */ @@ -915,8 +926,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, } /* determine which page in rq_respages[] we are currently filling */ - for (pn=1; pn < cd->rqstp->rq_resused; pn++) { - curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + for (page = cd->rqstp->rq_respages + 1; + page < cd->rqstp->rq_next_page; page++) { + curr_page_addr = page_address(*page); if (((caddr_t)cd->buffer >= curr_page_addr) && ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) @@ -928,39 +940,22 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, p = encode_entry_baggage(cd, p, name, namlen, ino); - /* throw in readdirplus baggage */ - if (plus) { - struct svc_fh fh; - - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { - *p++ = 0; - *p++ = 0; - } else - p = encode_entryplus_baggage(cd, p, &fh); - } + if (plus) + p = encode_entryplus_baggage(cd, p, name, namlen); num_entry_words = p - cd->buffer; - } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + } else if (*(page+1) != NULL) { /* temporarily encode entry into next page, then move back to * current and next page in rq_respages[] */ __be32 *p1, *tmp; int len1, len2; /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + p1 = tmp = page_address(*(page+1)); p1 = encode_entry_baggage(cd, p1, name, namlen, ino); - /* throw in readdirplus baggage */ - if (plus) { - struct svc_fh fh; - - if (compose_entry_fh(cd, &fh, name, namlen) > 0) { - /* zero out the filehandle */ - *p1++ = 0; - *p1++ = 0; - } else - p1 = encode_entryplus_baggage(cd, p1, &fh); - } + if (plus) + p1 = encode_entryplus_baggage(cd, p1, name, namlen); /* determine entry word length and lengths to go in pages */ num_entry_words = p1 - tmp; @@ -1101,11 +1096,13 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p, struct nfsd3_commitres *resp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nfssvc_boot.tv_sec); - *p++ = htonl(nfssvc_boot.tv_usec); + *p++ = htonl(nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_usec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab..d714156a19f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -1,6 +1,4 @@ /* - * fs/nfs4acl/acl.c - * * Common NFSv4 ACL handling code. * * Copyright (c) 2002, 2003 The Regents of the University of Michigan. @@ -36,17 +34,16 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/string.h> #include <linux/slab.h> -#include <linux/list.h> -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/module.h> #include <linux/nfs_fs.h> -#include <linux/posix_acl.h> -#include <linux/nfs4.h> -#include <linux/nfs4_acl.h> +#include "nfsfh.h" +#include "nfsd.h" +#include "acl.h" +#include "vfs.h" +#define NFS4_ACL_TYPE_DEFAULT 0x01 +#define NFS4_ACL_DIR 0x02 +#define NFS4_ACL_OWNER 0x04 /* mode bit translations: */ #define NFS4_READ_MODE (NFS4_ACE_READ_DATA) @@ -138,36 +135,47 @@ static short ace2type(struct nfs4_ace *); static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); -struct nfs4_acl * -nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, - unsigned int flags) +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_acl **acl) { - struct nfs4_acl *acl; + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; int size = 0; - if (pacl) { - if (posix_acl_valid(pacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*pacl->a_count; + pacl = get_acl(inode, ACL_TYPE_ACCESS); + if (!pacl) { + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) + return PTR_ERR(pacl); } - if (dpacl) { - if (posix_acl_valid(dpacl) < 0) - return ERR_PTR(-EINVAL); - size += 2*dpacl->a_count; + /* allocate for worst case: one (deny, allow) pair each: */ + size += 2 * pacl->a_count; + + if (S_ISDIR(inode->i_mode)) { + flags = NFS4_ACL_DIR; + dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (dpacl) + size += 2 * dpacl->a_count; } - /* Allocate for worst case: one (deny, allow) pair each: */ - acl = nfs4_acl_new(size); - if (acl == NULL) - return ERR_PTR(-ENOMEM); + *acl = nfs4_acl_new(size); + if (*acl == NULL) { + error = -ENOMEM; + goto out; + } - if (pacl) - _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); if (dpacl) - _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT); + _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - return acl; + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; } struct posix_acl_summary { @@ -272,7 +280,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->flag = eflag; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; } @@ -281,7 +289,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_uid = pa->e_uid; ace++; acl->naces++; pa++; @@ -308,7 +316,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; pa++; @@ -321,7 +329,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, deny = ~pas.group & pas.other; if (deny) { ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; - ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; + ace->flag = eflag; ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_GROUP; ace++; @@ -335,9 +343,9 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, if (deny) { ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE; ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; - ace->access_mask = mask_from_posix(deny, flags); + ace->access_mask = deny_mask_from_posix(deny, flags); ace->whotype = NFS4_ACL_WHO_NAMED; - ace->who = pa->e_id; + ace->who_gid = pa->e_gid; ace++; acl->naces++; } @@ -353,6 +361,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, acl->naces++; } +static bool +pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2) +{ + if (pace1->e_tag != pace2->e_tag) + return pace1->e_tag > pace2->e_tag; + if (pace1->e_tag == ACL_USER) + return uid_gt(pace1->e_uid, pace2->e_uid); + if (pace1->e_tag == ACL_GROUP) + return gid_gt(pace1->e_gid, pace2->e_gid); + return false; +} + static void sort_pacl_range(struct posix_acl *pacl, int start, int end) { int sorted = 0, i; @@ -363,8 +383,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) { while (!sorted) { sorted = 1; for (i = start; i < end; i++) { - if (pacl->a_entries[i].e_id - > pacl->a_entries[i+1].e_id) { + if (pace_gt(&pacl->a_entries[i], + &pacl->a_entries[i+1])) { sorted = 0; tmp = pacl->a_entries[i]; pacl->a_entries[i] = pacl->a_entries[i+1]; @@ -381,15 +401,17 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; sort_pacl_range(pacl, 1, i-1); BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); - j = i++; + j = ++i; while (pacl->a_entries[j].e_tag == ACL_GROUP) j++; sort_pacl_range(pacl, i, j-1); @@ -406,7 +428,10 @@ struct posix_ace_state { }; struct posix_user_ace_state { - uid_t uid; + union { + kuid_t uid; + kgid_t gid; + }; struct posix_ace_state perms; }; @@ -443,7 +468,7 @@ init_state(struct posix_acl_state *state, int cnt) * enough space for either: */ alloc = sizeof(struct posix_ace_state_array) - + cnt*sizeof(struct posix_ace_state); + + cnt*sizeof(struct posix_user_ace_state); state->users = kzalloc(alloc, GFP_KERNEL); if (!state->users) return -ENOMEM; @@ -506,19 +531,21 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all * permissions zero. */ - nace = 4 + state->users->n + state->groups->n; + if (!state->users->n && !state->groups->n) + nace = 3; + else /* Note we also include a MASK ACE in this case: */ + nace = 4 + state->users->n + state->groups->n; pacl = posix_acl_alloc(nace, GFP_KERNEL); if (!pacl) return ERR_PTR(-ENOMEM); @@ -529,7 +556,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; for (i=0; i < state->users->n; i++) { pace++; @@ -539,7 +565,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->users->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->users->aces[i].uid; + pace->e_uid = state->users->aces[i].uid; add_to_mask(state, &state->users->aces[i].perms); } @@ -549,7 +575,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; add_to_mask(state, &state->group); for (i=0; i < state->groups->n; i++) { @@ -560,14 +585,15 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) goto out_err; low_mode_from_nfs4(state->groups->aces[i].perms.allow, &pace->e_perm, flags); - pace->e_id = state->groups->aces[i].uid; + pace->e_gid = state->groups->aces[i].gid; add_to_mask(state, &state->groups->aces[i].perms); } - pace++; - pace->e_tag = ACL_MASK; - low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; + if (state->users->n || state->groups->n) { + pace++; + pace->e_tag = ACL_MASK; + low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); + } pace++; pace->e_tag = ACL_OTHER; @@ -575,7 +601,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) if (error) goto out_err; low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); - pace->e_id = ACL_UNDEFINED_ID; return pacl; out_err: @@ -595,12 +620,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask) astate->deny |= mask & ~astate->allow; } -static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) +static int find_uid(struct posix_acl_state *state, kuid_t uid) { + struct posix_ace_state_array *a = state->users; int i; for (i = 0; i < a->n; i++) - if (a->aces[i].uid == uid) + if (uid_eq(a->aces[i].uid, uid)) return i; /* Not found: */ a->n++; @@ -611,6 +637,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array return i; } +static int find_gid(struct posix_acl_state *state, kgid_t gid) +{ + struct posix_ace_state_array *a = state->groups; + int i; + + for (i = 0; i < a->n; i++) + if (gid_eq(a->aces[i].gid, gid)) + return i; + /* Not found: */ + a->n++; + a->aces[i].gid = gid; + a->aces[i].perms.allow = state->everyone.allow; + a->aces[i].perms.deny = state->everyone.deny; + + return i; +} + static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) { int i; @@ -644,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_USER: - i = find_uid(state, state->users, ace->who); + i = find_uid(state, ace->who_uid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->users->aces[i].perms, mask); } else { @@ -666,7 +709,7 @@ static void process_one_v4_ace(struct posix_acl_state *state, } break; case ACL_GROUP: - i = find_uid(state, state->groups, ace->who); + i = find_gid(state, ace->who_gid); if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { allow_bits(&state->groups->aces[i].perms, mask); } else { @@ -698,8 +741,9 @@ static void process_one_v4_ace(struct posix_acl_state *state, } } -int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, - struct posix_acl **dpacl, unsigned int flags) +static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, + struct posix_acl **pacl, struct posix_acl **dpacl, + unsigned int flags) { struct posix_acl_state effective_acl_state, default_acl_state; struct nfs4_ace *ace; @@ -759,6 +803,57 @@ out_estate: return ret; } +__be32 +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + __be32 error; + int host_error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + if (!inode->i_op->set_acl || !IS_POSIXACL(inode)) + return nfserr_attrnotsupp; + + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; + + host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + if (host_error == -EINVAL) + return nfserr_attrnotsupp; + if (host_error < 0) + goto out_nfserr; + + host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS); + if (host_error < 0) + goto out_release; + + if (S_ISDIR(inode->i_mode)) { + host_error = inode->i_op->set_acl(inode, dpacl, + ACL_TYPE_DEFAULT); + } + +out_release: + posix_acl_release(pacl); + posix_acl_release(dpacl); +out_nfserr: + if (host_error == -EOPNOTSUPP) + return nfserr_attrnotsupp; + else + return nfserrno(host_error); +} + + static short ace2type(struct nfs4_ace *ace) { @@ -777,9 +872,6 @@ ace2type(struct nfs4_ace *ace) return -1; } -EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); -EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); - struct nfs4_acl * nfs4_acl_new(int n) { @@ -827,21 +919,21 @@ nfs4_acl_get_whotype(char *p, u32 len) return NFS4_ACL_WHO_NAMED; } -int -nfs4_acl_write_who(int who, char *p) +__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) { + __be32 *p; int i; for (i = 0; i < ARRAY_SIZE(s2t_map); i++) { - if (s2t_map[i].type == who) { - memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); - return s2t_map[i].stringlen; - } + if (s2t_map[i].type != who) + continue; + p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, s2t_map[i].string, + s2t_map[i].stringlen); + return 0; } - BUG(); + WARN_ON_ONCE(1); return -1; } - -EXPORT_SYMBOL(nfs4_acl_new); -EXPORT_SYMBOL(nfs4_acl_get_whotype); -EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index aae2b29ae2c..2c73cae9899 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/nfs4callback.c - * * Copyright (c) 2001 The Regents of the University of Michigan. * All rights reserved. * @@ -33,283 +31,523 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/module.h> -#include <linux/list.h> -#include <linux/inet.h> -#include <linux/errno.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/kthread.h> -#include <linux/sunrpc/xdr.h> -#include <linux/sunrpc/svc.h> #include <linux/sunrpc/clnt.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/state.h> -#include <linux/sunrpc/sched.h> -#include <linux/nfs4.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/svc_xprt.h> +#include <linux/slab.h> +#include "nfsd.h" +#include "state.h" +#include "netns.h" +#include "xdr4cb.h" #define NFSDDBG_FACILITY NFSDDBG_PROC +static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); + #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 -/* declarations */ -static const struct rpc_call_ops nfs4_cb_null_ops; - /* Index of predefined Linux callback client operations */ enum { - NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, + NFSPROC4_CLNT_CB_SEQUENCE, +}; + +struct nfs4_cb_compound_hdr { + /* args */ + u32 ident; /* minorversion 0 only */ + u32 nops; + __be32 *nops_p; + u32 minorversion; + /* res */ + int status; }; +/* + * Handle decode buffer overflows out-of-line. + */ +static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) +{ + dprintk("NFS: %s prematurely hit the end of our receive buffer. " + "Remaining buffer length is %tu words.\n", + func, xdr->end - xdr->p); +} + +static __be32 *xdr_encode_empty_array(__be32 *p) +{ + *p++ = xdr_zero; + return p; +} + +/* + * Encode/decode NFSv4 CB basic data types + * + * Basic NFSv4 callback data types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section + * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version + * 1 Protocol" + */ + +/* + * nfs_cb_opnum4 + * + * enum nfs_cb_opnum4 { + * OP_CB_GETATTR = 3, + * ... + * }; + */ enum nfs_cb_opnum4 { - OP_CB_RECALL = 4, + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_ILLEGAL = 10044 }; -#define NFS4_MAXTAGLEN 20 +static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op) +{ + __be32 *p; -#define NFS4_enc_cb_null_sz 0 -#define NFS4_dec_cb_null_sz 0 -#define cb_compound_enc_hdr_sz 4 -#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) -#define op_enc_sz 1 -#define op_dec_sz 2 -#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) -#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) -#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ - 1 + enc_stateid_sz + \ - enc_nfs4_fh_sz) + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(op); +} -#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ - op_dec_sz) +/* + * nfs_fh4 + * + * typedef opaque nfs_fh4<NFS4_FHSIZE>; + */ +static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) +{ + u32 length = fh->fh_size; + __be32 *p; + + BUG_ON(length > NFS4_FHSIZE); + p = xdr_reserve_space(xdr, 4 + length); + xdr_encode_opaque(p, &fh->fh_base, length); +} /* -* Generic encode routines from fs/nfs/nfs4xdr.c -*/ -static inline __be32 * -xdr_writemem(__be32 *p, const void *ptr, int nbytes) -{ - int tmp = XDR_QUADLEN(nbytes); - if (!tmp) - return p; - p[tmp-1] = 0; - memcpy(p, ptr, nbytes); - return p + tmp; -} - -#define WRITE32(n) *p++ = htonl(n) -#define WRITEMEM(ptr,nbytes) do { \ - p = xdr_writemem(p, ptr, nbytes); \ -} while (0) -#define RESERVE_SPACE(nbytes) do { \ - p = xdr_reserve_space(xdr, nbytes); \ - if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ - BUG_ON(!p); \ -} while (0) + * stateid4 + * + * struct stateid4 { + * uint32_t seqid; + * opaque other[12]; + * }; + */ +static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(sid->si_generation); + xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE); +} /* - * Generic decode routines from fs/nfs/nfs4xdr.c + * sessionid4 + * + * typedef opaque sessionid4[NFS4_SESSIONID_SIZE]; */ -#define DECODE_TAIL \ - status = 0; \ -out: \ - return status; \ -xdr_error: \ - dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \ - status = -EIO; \ - goto out - -#define READ32(x) (x) = ntohl(*p++) -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) -#define READTIME(x) do { \ - p++; \ - (x.tv_sec) = ntohl(*p++); \ - (x.tv_nsec) = ntohl(*p++); \ -} while (0) -#define READ_BUF(nbytes) do { \ - p = xdr_inline_decode(xdr, nbytes); \ - if (!p) { \ - dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ - __FUNCTION__, __LINE__); \ - return -EIO; \ - } \ -} while (0) +static void encode_sessionid4(struct xdr_stream *xdr, + const struct nfsd4_session *session) +{ + __be32 *p; -struct nfs4_cb_compound_hdr { - int status; - u32 ident; - u32 nops; - u32 taglen; - char * tag; -}; + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); + xdr_encode_opaque_fixed(p, session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); +} -static struct { -int stat; -int errno; +/* + * nfsstat4 + */ +static const struct { + int stat; + int errno; } nfs_cb_errtbl[] = { - { NFS4_OK, 0 }, - { NFS4ERR_PERM, EPERM }, - { NFS4ERR_NOENT, ENOENT }, - { NFS4ERR_IO, EIO }, - { NFS4ERR_NXIO, ENXIO }, - { NFS4ERR_ACCESS, EACCES }, - { NFS4ERR_EXIST, EEXIST }, - { NFS4ERR_XDEV, EXDEV }, - { NFS4ERR_NOTDIR, ENOTDIR }, - { NFS4ERR_ISDIR, EISDIR }, - { NFS4ERR_INVAL, EINVAL }, - { NFS4ERR_FBIG, EFBIG }, - { NFS4ERR_NOSPC, ENOSPC }, - { NFS4ERR_ROFS, EROFS }, - { NFS4ERR_MLINK, EMLINK }, - { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, - { NFS4ERR_NOTEMPTY, ENOTEMPTY }, - { NFS4ERR_DQUOT, EDQUOT }, - { NFS4ERR_STALE, ESTALE }, - { NFS4ERR_BADHANDLE, EBADHANDLE }, - { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, - { NFS4ERR_NOTSUPP, ENOTSUPP }, - { NFS4ERR_TOOSMALL, ETOOSMALL }, - { NFS4ERR_SERVERFAULT, ESERVERFAULT }, - { NFS4ERR_BADTYPE, EBADTYPE }, - { NFS4ERR_LOCKED, EAGAIN }, - { NFS4ERR_RESOURCE, EREMOTEIO }, - { NFS4ERR_SYMLINK, ELOOP }, - { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, - { NFS4ERR_DEADLOCK, EDEADLK }, - { -1, EIO } + { NFS4_OK, 0 }, + { NFS4ERR_PERM, -EPERM }, + { NFS4ERR_NOENT, -ENOENT }, + { NFS4ERR_IO, -EIO }, + { NFS4ERR_NXIO, -ENXIO }, + { NFS4ERR_ACCESS, -EACCES }, + { NFS4ERR_EXIST, -EEXIST }, + { NFS4ERR_XDEV, -EXDEV }, + { NFS4ERR_NOTDIR, -ENOTDIR }, + { NFS4ERR_ISDIR, -EISDIR }, + { NFS4ERR_INVAL, -EINVAL }, + { NFS4ERR_FBIG, -EFBIG }, + { NFS4ERR_NOSPC, -ENOSPC }, + { NFS4ERR_ROFS, -EROFS }, + { NFS4ERR_MLINK, -EMLINK }, + { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, + { NFS4ERR_DQUOT, -EDQUOT }, + { NFS4ERR_STALE, -ESTALE }, + { NFS4ERR_BADHANDLE, -EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, + { NFS4ERR_NOTSUPP, -ENOTSUPP }, + { NFS4ERR_TOOSMALL, -ETOOSMALL }, + { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_BADTYPE, -EBADTYPE }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_RESOURCE, -EREMOTEIO }, + { NFS4ERR_SYMLINK, -ELOOP }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, -EDEADLK }, + { -1, -EIO } }; -static int -nfs_cb_stat_to_errno(int stat) +/* + * If we cannot translate the error, the recovery routines should + * handle it. + * + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ +static int nfs_cb_stat_to_errno(int status) { int i; + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { - if (nfs_cb_errtbl[i].stat == stat) + if (nfs_cb_errtbl[i].stat == status) return nfs_cb_errtbl[i].errno; } - /* If we cannot translate the error, the recovery routines should - * handle it. - * Note: remaining NFSv4 error codes have values > 10000, so should - * not conflict with native Linux error codes. - */ - return stat; + + dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status); + return -status; } -/* - * XDR encode - */ +static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, + enum nfsstat4 *status) +{ + __be32 *p; + u32 op; + + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + op = be32_to_cpup(p++); + if (unlikely(op != expected)) + goto out_unexpected; + *status = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +out_unexpected: + dprintk("NFSD: Callback server returned operation %d but " + "we issued a request for %d\n", op, expected); + return -EIO; +} -static int -encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) +/* + * CB_COMPOUND4args + * + * struct CB_COMPOUND4args { + * utf8str_cs tag; + * uint32_t minorversion; + * uint32_t callback_ident; + * nfs_cb_argop4 argarray<>; + * }; +*/ +static void encode_cb_compound4args(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr) { __be32 * p; - RESERVE_SPACE(16); - WRITE32(0); /* tag length is always 0 */ - WRITE32(NFS4_MINOR_VERSION); - WRITE32(hdr->ident); - WRITE32(hdr->nops); - return 0; + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4); + p = xdr_encode_empty_array(p); /* empty tag */ + *p++ = cpu_to_be32(hdr->minorversion); + *p++ = cpu_to_be32(hdr->ident); + + hdr->nops_p = p; + *p = cpu_to_be32(hdr->nops); /* argarray element count */ } -static int -encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +/* + * Update argarray element count + */ +static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) { - __be32 *p; - int len = cb_rec->cbr_fhlen; - - RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); - WRITE32(OP_CB_RECALL); - WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); - WRITE32(cb_rec->cbr_trunc); - WRITE32(len); - WRITEMEM(cb_rec->cbr_fhval, len); - return 0; + BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS); + *hdr->nops_p = cpu_to_be32(hdr->nops); } -static int -nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) +/* + * CB_COMPOUND4res + * + * struct CB_COMPOUND4res { + * nfsstat4 status; + * utf8str_cs tag; + * nfs_cb_resop4 resarray<>; + * }; + */ +static int decode_cb_compound4res(struct xdr_stream *xdr, + struct nfs4_cb_compound_hdr *hdr) { - struct xdr_stream xdrs, *xdr = &xdrs; + u32 length; + __be32 *p; - xdr_init_encode(&xdrs, &req->rq_snd_buf, p); - RESERVE_SPACE(0); + p = xdr_inline_decode(xdr, 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + hdr->status = be32_to_cpup(p++); + /* Ignore the tag */ + length = be32_to_cpup(p++); + p = xdr_inline_decode(xdr, length + 4); + if (unlikely(p == NULL)) + goto out_overflow; + hdr->nops = be32_to_cpup(p); return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; } -static int -nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args) +/* + * CB_RECALL4args + * + * struct CB_RECALL4args { + * stateid4 stateid; + * bool truncate; + * nfs_fh4 fh; + * }; + */ +static void encode_cb_recall4args(struct xdr_stream *xdr, + const struct nfs4_delegation *dp, + struct nfs4_cb_compound_hdr *hdr) { - struct xdr_stream xdr; - struct nfs4_cb_compound_hdr hdr = { - .ident = args->cbr_ident, - .nops = 1, - }; + __be32 *p; + + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); + encode_stateid4(xdr, &dp->dl_stid.sc_stateid); + + p = xdr_reserve_space(xdr, 4); + *p++ = xdr_zero; /* truncate */ - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_cb_compound_hdr(&xdr, &hdr); - return (encode_cb_recall(&xdr, args)); + encode_nfs_fh4(xdr, &dp->dl_fh); + + hdr->nops++; } +/* + * CB_SEQUENCE4args + * + * struct CB_SEQUENCE4args { + * sessionid4 csa_sessionid; + * sequenceid4 csa_sequenceid; + * slotid4 csa_slotid; + * slotid4 csa_highest_slotid; + * bool csa_cachethis; + * referring_call_list4 csa_referring_call_lists<>; + * }; + */ +static void encode_cb_sequence4args(struct xdr_stream *xdr, + const struct nfsd4_callback *cb, + struct nfs4_cb_compound_hdr *hdr) +{ + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + __be32 *p; + + if (hdr->minorversion == 0) + return; -static int -decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ - __be32 *p; + encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE); + encode_sessionid4(xdr, session); - READ_BUF(8); - READ32(hdr->status); - READ32(hdr->taglen); - READ_BUF(hdr->taglen + 4); - hdr->tag = (char *)p; - p += XDR_QUADLEN(hdr->taglen); - READ32(hdr->nops); - return 0; + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4); + *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */ + *p++ = xdr_zero; /* csa_slotid */ + *p++ = xdr_zero; /* csa_highest_slotid */ + *p++ = xdr_zero; /* csa_cachethis */ + xdr_encode_empty_array(p); /* csa_referring_call_lists */ + + hdr->nops++; } -static int -decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +/* + * CB_SEQUENCE4resok + * + * struct CB_SEQUENCE4resok { + * sessionid4 csr_sessionid; + * sequenceid4 csr_sequenceid; + * slotid4 csr_slotid; + * slotid4 csr_highest_slotid; + * slotid4 csr_target_highest_slotid; + * }; + * + * union CB_SEQUENCE4res switch (nfsstat4 csr_status) { + * case NFS4_OK: + * CB_SEQUENCE4resok csr_resok4; + * default: + * void; + * }; + * + * Our current back channel implmentation supports a single backchannel + * with a single slot. + */ +static int decode_cb_sequence4resok(struct xdr_stream *xdr, + struct nfsd4_callback *cb) { + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + struct nfs4_sessionid id; + int status; __be32 *p; - u32 op; - int32_t nfserr; - - READ_BUF(8); - READ32(op); - if (op != expected) { - dprintk("NFSD: decode_cb_op_hdr: Callback server returned " - " operation %d but we issued a request for %d\n", - op, expected); - return -EIO; + u32 dummy; + + status = -ESERVERFAULT; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4); + if (unlikely(p == NULL)) + goto out_overflow; + memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); + if (memcmp(id.data, session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN) != 0) { + dprintk("NFS: %s Invalid session id\n", __func__); + goto out; } - READ32(nfserr); - if (nfserr != NFS_OK) - return -nfs_cb_stat_to_errno(nfserr); - return 0; + p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); + + dummy = be32_to_cpup(p++); + if (dummy != session->se_cb_seq_nr) { + dprintk("NFS: %s Invalid sequence number\n", __func__); + goto out; + } + + dummy = be32_to_cpup(p++); + if (dummy != 0) { + dprintk("NFS: %s Invalid slotid\n", __func__); + goto out; + } + + /* + * FIXME: process highest slotid and target highest slotid + */ + status = 0; +out: + if (status) + nfsd4_mark_cb_fault(cb->cb_clp, status); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_cb_sequence4res(struct xdr_stream *xdr, + struct nfsd4_callback *cb) +{ + enum nfsstat4 nfserr; + int status; + + if (cb->cb_minorversion == 0) + return 0; + + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + goto out_default; + status = decode_cb_sequence4resok(xdr, cb); +out: + return status; +out_default: + return nfs_cb_stat_to_errno(nfserr); +} + +/* + * NFSv4.0 and NFSv4.1 XDR encode functions + * + * NFSv4.0 callback argument types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section 20 + * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1 + * Protocol". + */ + +/* + * NB: Without this zero space reservation, callbacks over krb5p fail + */ +static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, + void *__unused) +{ + xdr_reserve_space(xdr, 0); } -static int -nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) +/* + * 20.2. Operation 4: CB_RECALL - Recall a Delegation + */ +static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, + const struct nfsd4_callback *cb) +{ + const struct nfs4_delegation *args = cb->cb_op; + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_recall4args(xdr, args, &hdr); + encode_cb_nops(&hdr); +} + + +/* + * NFSv4.0 and NFSv4.1 XDR decode functions + * + * NFSv4.0 callback result types are defined in section 15 of RFC + * 3530: "Network File System (NFS) version 4 Protocol" and section 20 + * of RFC 5661: "Network File System (NFS) Version 4 Minor Version 1 + * Protocol". + */ + +static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, + void *__unused) { return 0; } -static int -nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p) +/* + * 20.2. Operation 4: CB_RECALL - Recall a Delegation + */ +static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfsd4_callback *cb) { - struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr; + enum nfsstat4 nfserr; int status; - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_cb_compound_hdr(&xdr, &hdr); - if (status) + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) goto out; - status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); + + if (cb != NULL) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status)) + goto out; + } + + status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); + if (unlikely(status)) + goto out; + if (unlikely(nfserr != NFS4_OK)) + status = nfs_cb_stat_to_errno(nfserr); out: return status; } @@ -317,172 +555,503 @@ out: /* * RPC procedure tables */ -#define PROC(proc, call, argtype, restype) \ -[NFSPROC4_CLNT_##proc] = { \ - .p_proc = NFSPROC4_CB_##call, \ - .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ - .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ - .p_arglen = NFS4_##argtype##_sz, \ - .p_replen = NFS4_##restype##_sz, \ - .p_statidx = NFSPROC4_CB_##call, \ - .p_name = #proc, \ -} - -static struct rpc_procinfo nfs4_cb_procedures[] = { - PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), - PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), +#define PROC(proc, call, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_CB_##call, \ + .p_encode = (kxdreproc_t)nfs4_xdr_enc_##argtype, \ + .p_decode = (kxdrdproc_t)nfs4_xdr_dec_##restype, \ + .p_arglen = NFS4_enc_##argtype##_sz, \ + .p_replen = NFS4_dec_##restype##_sz, \ + .p_statidx = NFSPROC4_CB_##call, \ + .p_name = #proc, \ +} + +static struct rpc_procinfo nfs4_cb_procedures[] = { + PROC(CB_NULL, NULL, cb_null, cb_null), + PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), }; -static struct rpc_version nfs_cb_version4 = { - .number = 1, - .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), - .procs = nfs4_cb_procedures +static struct rpc_version nfs_cb_version4 = { +/* + * Note on the callback rpc program version number: despite language in rfc + * 5661 section 18.36.3 requiring servers to use 4 in this field, the + * official xdr descriptions for both 4.0 and 4.1 specify version 1, and + * in practice that appears to be what implementations use. The section + * 18.36.3 language is expected to be fixed in an erratum. + */ + .number = 1, + .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), + .procs = nfs4_cb_procedures }; -static struct rpc_version * nfs_cb_version[] = { - NULL, +static const struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -/* Reference counting, callback cleanup, etc., all look racy as heck. - * And why is cb_set an atomic? */ +static const struct rpc_program cb_program; + +static struct rpc_stat cb_stats = { + .program = &cb_program +}; + +#define NFS4_CALLBACK 0x40000000 +static const struct rpc_program cb_program = { + .name = "nfs4_cb", + .number = NFS4_CALLBACK, + .nrvers = ARRAY_SIZE(nfs_cb_version), + .version = nfs_cb_version, + .stats = &cb_stats, + .pipe_dir_name = "nfsd4_cb", +}; + +static int max_cb_time(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return max(nn->nfsd4_lease/10, (time_t)1) * HZ; +} + +static struct rpc_cred *callback_cred; + +int set_callback_cred(void) +{ + if (callback_cred) + return 0; + callback_cred = rpc_lookup_machine_cred("nfs"); + if (!callback_cred) + return -ENOMEM; + return 0; +} + +static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses) +{ + if (clp->cl_minorversion == 0) { + return get_rpccred(callback_cred); + } else { + struct rpc_auth *auth = client->cl_auth; + struct auth_cred acred = {}; + + acred.uid = ses->se_cb_sec.uid; + acred.gid = ses->se_cb_sec.gid; + return auth->au_ops->lookup_cred(client->cl_auth, &acred, 0); + } +} -static int do_probe_callback(void *data) +static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) { - struct nfs4_client *clp = data; - struct sockaddr_in addr; - struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_xprt *xprt; + + if (args->protocol != XPRT_TRANSPORT_BC_TCP) + return rpc_create(args); + + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + + return rpc_create(args); +} + +static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) +{ + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = (NFSD_LEASE_TIME/4) * HZ, - .to_retries = 5, - .to_maxval = (NFSD_LEASE_TIME/2) * HZ, - .to_exponential = 1, + .to_initval = maxtime, + .to_retries = 0, + .to_maxval = maxtime, }; - struct rpc_program * program = &cb->cb_program; struct rpc_create_args args = { - .protocol = IPPROTO_TCP, - .address = (struct sockaddr *)&addr, - .addrsize = sizeof(addr), + .net = clp->net, + .address = (struct sockaddr *) &conn->cb_addr, + .addrsize = conn->cb_addrlen, + .saddress = (struct sockaddr *) &conn->cb_saddr, .timeout = &timeparms, - .program = program, - .version = nfs_cb_version[1]->number, - .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ - .flags = (RPC_CLNT_CREATE_NOPING), - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], - .rpc_argp = clp, + .program = &cb_program, + .version = 0, + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; - int status; - - /* Initialize address */ - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(cb->cb_port); - addr.sin_addr.s_addr = htonl(cb->cb_addr); - - /* Initialize rpc_program */ - program->name = "nfs4_cb"; - program->number = cb->cb_prog; - program->nrvers = ARRAY_SIZE(nfs_cb_version); - program->version = nfs_cb_version; - program->stats = &cb->cb_stat; - - /* Initialize rpc_stat */ - memset(program->stats, 0, sizeof(cb->cb_stat)); - program->stats->program = program; + struct rpc_cred *cred; + if (clp->cl_minorversion == 0) { + if (!clp->cl_cred.cr_principal && + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) + return -EINVAL; + args.client_name = clp->cl_cred.cr_principal; + args.prognumber = conn->cb_prog, + args.protocol = XPRT_TRANSPORT_TCP; + args.authflavor = clp->cl_cred.cr_flavor; + clp->cl_cb_ident = conn->cb_ident; + } else { + if (!conn->cb_xprt) + return -EINVAL; + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; + clp->cl_cb_session = ses; + args.bc_xprt = conn->cb_xprt; + args.prognumber = clp->cl_cb_session->se_cb_prog; + args.protocol = XPRT_TRANSPORT_BC_TCP; + args.authflavor = ses->se_cb_sec.flavor; + } /* Create RPC client */ - client = rpc_create(&args); + client = create_backchannel_client(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client\n"); - status = PTR_ERR(client); - goto out_err; + dprintk("NFSD: couldn't create callback client: %ld\n", + PTR_ERR(client)); + return PTR_ERR(client); + } + cred = get_backchannel_cred(clp, client, ses); + if (IS_ERR(cred)) { + rpc_shutdown_client(client); + return PTR_ERR(cred); } + clp->cl_cb_client = client; + clp->cl_cb_cred = cred; + return 0; +} - status = rpc_call_sync(client, &msg, RPC_TASK_SOFT); +static void warn_no_callback_path(struct nfs4_client *clp, int reason) +{ + dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", + (int)clp->cl_name.len, clp->cl_name.data, reason); +} - if (status) - goto out_release_client; +static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_DOWN; + warn_no_callback_path(clp, reason); +} - cb->cb_client = client; - atomic_set(&cb->cb_set, 1); - put_nfs4_client(clp); - return 0; -out_release_client: - rpc_shutdown_client(client); -out_err: - put_nfs4_client(clp); - dprintk("NFSD: warning: no callback path to client %.*s\n", - (int)clp->cl_name.len, clp->cl_name.data); - return status; +static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_FAULT; + warn_no_callback_path(clp, reason); } -/* - * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... - */ -void -nfsd4_probe_callback(struct nfs4_client *clp) +static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + + if (task->tk_status) + nfsd4_mark_cb_down(clp, task->tk_status); + else + clp->cl_cb_state = NFSD4_CB_UP; +} + +static const struct rpc_call_ops nfsd4_cb_probe_ops = { + /* XXX: release method to ensure we set the cb channel down if + * necessary on early failure? */ + .rpc_call_done = nfsd4_cb_probe_done, +}; + +static struct workqueue_struct *callback_wq; + +static void run_nfsd4_cb(struct nfsd4_callback *cb) { - struct task_struct *t; + queue_work(callback_wq, &cb->cb_work); +} - BUG_ON(atomic_read(&clp->cl_callback.cb_set)); +static void do_probe_callback(struct nfs4_client *clp) +{ + struct nfsd4_callback *cb = &clp->cl_cb_null; - /* the task holds a reference to the nfs4_client struct */ - atomic_inc(&clp->cl_count); + cb->cb_op = NULL; + cb->cb_clp = clp; - t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; + cb->cb_msg.rpc_argp = NULL; + cb->cb_msg.rpc_resp = NULL; - if (IS_ERR(t)) - atomic_dec(&clp->cl_count); + cb->cb_ops = &nfsd4_cb_probe_ops; - return; + run_nfsd4_cb(cb); } /* - * called with dp->dl_count inc'ed. - * nfs4_lock_state() may or may not have been called. + * Poke the callback thread to process any updates to the callback + * parameters, and send a null probe. */ -void -nfsd4_cb_recall(struct nfs4_delegation *dp) -{ - struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_callback.cb_client; - struct nfs4_cb_recall *cbr = &dp->dl_recall; - struct rpc_message msg = { - .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_argp = cbr, - }; - int retries = 1; - int status = 0; - - cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ - cbr->cbr_dp = dp; - - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); - while (retries--) { - switch (status) { - case -EIO: - /* Network partition? */ - atomic_set(&clp->cl_callback.cb_set, 0); - case -EBADHANDLE: - case -NFS4ERR_BAD_STATEID: - /* Race: client probably got cb_recall - * before open reply granting delegation */ - break; - default: - goto out_put_cred; +void nfsd4_probe_callback(struct nfs4_client *clp) +{ + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); + do_probe_callback(clp); +} + +void nfsd4_probe_callback_sync(struct nfs4_client *clp) +{ + nfsd4_probe_callback(clp); + flush_workqueue(callback_wq); +} + +void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) +{ + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + spin_lock(&clp->cl_lock); + memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); + spin_unlock(&clp->cl_lock); +} + +/* + * There's currently a single callback channel slot. + * If the slot is available, then mark it busy. Otherwise, set the + * thread for sleeping on the callback RPC wait queue. + */ +static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) +{ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); + dprintk("%s slot is busy\n", __func__); + return false; + } + return true; +} + +/* + * TODO: cb_sequence should support referring call lists, cachethis, multiple + * slots, and mark callback channel down on communication errors. + */ +static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; + u32 minorversion = clp->cl_minorversion; + + cb->cb_minorversion = minorversion; + if (minorversion) { + if (!nfsd41_cb_get_slot(clp, task)) + return; + } + spin_lock(&clp->cl_lock); + if (list_empty(&cb->cb_per_client)) { + /* This is the first call, not a restart */ + cb->cb_done = false; + list_add(&cb->cb_per_client, &clp->cl_callbacks); + } + spin_unlock(&clp->cl_lock); + rpc_call_start(task); +} + +static void nfsd4_cb_done(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; + + dprintk("%s: minorversion=%d\n", __func__, + clp->cl_minorversion); + + if (clp->cl_minorversion) { + /* No need for lock, access serialized in nfsd4_cb_prepare */ + ++clp->cl_cb_session->se_cb_seq_nr; + clear_bit(0, &clp->cl_cb_slot_busy); + rpc_wake_up_next(&clp->cl_cb_waitq); + dprintk("%s: freed slot, new seqid=%d\n", __func__, + clp->cl_cb_session->se_cb_seq_nr); + + /* We're done looking into the sequence information */ + task->tk_msg.rpc_resp = NULL; + } +} + + +static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + struct nfs4_client *clp = cb->cb_clp; + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; + + nfsd4_cb_done(task, calldata); + + if (current_rpc_client != task->tk_client) { + /* We're shutting down or changing cl_cb_client; leave + * it to nfsd4_process_cb_update to restart the call if + * necessary. */ + return; + } + + if (cb->cb_done) + return; + switch (task->tk_status) { + case 0: + cb->cb_done = true; + return; + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: + /* Network partition? */ + nfsd4_mark_cb_down(clp, task->tk_status); + } + if (dp->dl_retries--) { + rpc_delay(task, 2*HZ); + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } + nfsd4_mark_cb_down(clp, task->tk_status); + cb->cb_done = true; +} + +static void nfsd4_cb_recall_release(void *calldata) +{ + struct nfsd4_callback *cb = calldata; + struct nfs4_client *clp = cb->cb_clp; + struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); + + if (cb->cb_done) { + spin_lock(&clp->cl_lock); + list_del(&cb->cb_per_client); + spin_unlock(&clp->cl_lock); + nfs4_put_delegation(dp); + } +} + +static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_prepare = nfsd4_cb_prepare, + .rpc_call_done = nfsd4_cb_recall_done, + .rpc_release = nfsd4_cb_recall_release, +}; + +int nfsd4_create_callback_queue(void) +{ + callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); + if (!callback_wq) + return -ENOMEM; + return 0; +} + +void nfsd4_destroy_callback_queue(void) +{ + destroy_workqueue(callback_wq); +} + +/* must be called under the state lock */ +void nfsd4_shutdown_callback(struct nfs4_client *clp) +{ + set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); + /* + * Note this won't actually result in a null callback; + * instead, nfsd4_do_callback_rpc() will detect the killed + * client, destroy the rpc client, and stop: + */ + do_probe_callback(clp); + flush_workqueue(callback_wq); +} + +static void nfsd4_release_cb(struct nfsd4_callback *cb) +{ + if (cb->cb_ops->rpc_release) + cb->cb_ops->rpc_release(cb); +} + +/* requires cl_lock: */ +static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) +{ + struct nfsd4_session *s; + struct nfsd4_conn *c; + + list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_flags & NFS4_CDFC4_BACK) + return c; } - ssleep(2); - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); } -out_put_cred: + return NULL; +} + +static void nfsd4_process_cb_update(struct nfsd4_callback *cb) +{ + struct nfs4_cb_conn conn; + struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = NULL; + struct nfsd4_conn *c; + int err; + /* - * Success or failure, now we're either waiting for lease expiration - * or deleg_return. + * This is either an update, or the client dying; in either case, + * kill the old client: */ - put_nfs4_client(clp); - nfs4_put_delegation(dp); - return; + if (clp->cl_cb_client) { + rpc_shutdown_client(clp->cl_cb_client); + clp->cl_cb_client = NULL; + put_rpccred(clp->cl_cb_cred); + clp->cl_cb_cred = NULL; + } + if (clp->cl_cb_conn.cb_xprt) { + svc_xprt_put(clp->cl_cb_conn.cb_xprt); + clp->cl_cb_conn.cb_xprt = NULL; + } + if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) + return; + spin_lock(&clp->cl_lock); + /* + * Only serialized callback code is allowed to clear these + * flags; main nfsd code can only set them: + */ + BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); + memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); + c = __nfsd4_find_backchannel(clp); + if (c) { + svc_xprt_get(c->cn_xprt); + conn.cb_xprt = c->cn_xprt; + ses = c->cn_session; + } + spin_unlock(&clp->cl_lock); + + err = setup_callback_client(clp, &conn, ses); + if (err) { + nfsd4_mark_cb_down(clp, err); + return; + } + /* Yay, the callback channel's back! Restart any callbacks: */ + list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) + run_nfsd4_cb(cb); +} + +static void nfsd4_do_callback_rpc(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); + struct nfs4_client *clp = cb->cb_clp; + struct rpc_clnt *clnt; + + if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) + nfsd4_process_cb_update(cb); + + clnt = clp->cl_cb_client; + if (!clnt) { + /* Callback channel broken, or client killed; give up: */ + nfsd4_release_cb(cb); + return; + } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + cb->cb_ops, cb); +} + +void nfsd4_init_callback(struct nfsd4_callback *cb) +{ + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); +} + +void nfsd4_cb_recall(struct nfs4_delegation *dp) +{ + struct nfsd4_callback *cb = &dp->dl_recall; + struct nfs4_client *clp = dp->dl_stid.sc_client; + + dp->dl_retries = 1; + cb->cb_op = dp; + cb->cb_clp = clp; + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; + cb->cb_msg.rpc_argp = cb; + cb->cb_msg.rpc_resp = cb; + + cb->cb_ops = &nfsd4_cb_recall_ops; + + INIT_LIST_HEAD(&cb->cb_per_client); + cb->cb_done = true; + + run_nfsd4_cb(&dp->dl_recall); } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 996bd88b75b..a0ab0a847d6 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -1,6 +1,4 @@ /* - * fs/nfsd/nfs4idmap.c - * * Mapping of UID/GIDs to name and vice versa. * * Copyright (c) 2002, 2003 The Regents of the University of @@ -35,23 +33,22 @@ */ #include <linux/module.h> -#include <linux/init.h> - -#include <linux/mm.h> -#include <linux/utsname.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/sunrpc/clnt.h> -#include <linux/nfs.h> -#include <linux/nfs4.h> -#include <linux/nfs_fs.h> -#include <linux/nfs_page.h> -#include <linux/sunrpc/cache.h> -#include <linux/nfsd_idmap.h> -#include <linux/list.h> -#include <linux/time.h> #include <linux/seq_file.h> -#include <linux/sunrpc/svcauth.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sunrpc/svc_xprt.h> +#include <net/net_namespace.h> +#include "idmap.h" +#include "nfsd.h" +#include "netns.h" + +/* + * Turn off idmapping when using AUTH_SYS. + */ +static bool nfs4_disable_idmapping = true; +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off server's NFSv4 idmapping when using 'sec=sys'"); /* * Cache entry @@ -68,7 +65,7 @@ struct ent { struct cache_head h; int type; /* User / Group */ - uid_t id; + u32 id; char name[IDMAP_NAMESZ]; char authname[IDMAP_NAMESZ]; }; @@ -77,7 +74,6 @@ struct ent { #define ENT_HASHBITS 8 #define ENT_HASHMAX (1 << ENT_HASHBITS) -#define ENT_HASHMASK (ENT_HASHMAX - 1) static void ent_init(struct cache_head *cnew, struct cache_head *citm) @@ -113,8 +109,6 @@ ent_alloc(void) * ID -> Name cache */ -static struct cache_head *idtoname_table[ENT_HASHMAX]; - static uint32_t idtoname_hash(struct ent *ent) { @@ -175,21 +169,21 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) } static void -warn_no_idmapd(struct cache_detail *detail) +warn_no_idmapd(struct cache_detail *detail, int has_died) { printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", - detail->last_close? "died" : "not been started"); + has_died ? "died" : "not been started"); } static int idtoname_parse(struct cache_detail *, char *, int); -static struct ent *idtoname_lookup(struct ent *); -static struct ent *idtoname_update(struct ent *, struct ent *); +static struct ent *idtoname_lookup(struct cache_detail *, struct ent *); +static struct ent *idtoname_update(struct cache_detail *, struct ent *, + struct ent *); -static struct cache_detail idtoname_cache = { +static struct cache_detail idtoname_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, - .hash_table = idtoname_table, .name = "nfs4.idtoname", .cache_put = ent_put, .cache_request = idtoname_request, @@ -202,7 +196,7 @@ static struct cache_detail idtoname_cache = { .alloc = ent_alloc, }; -int +static int idtoname_parse(struct cache_detail *cd, char *buf, int buflen) { struct ent ent, *res; @@ -244,7 +238,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) goto out; error = -ENOMEM; - res = idtoname_lookup(&ent); + res = idtoname_lookup(cd, &ent); if (!res) goto out; @@ -260,11 +254,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) else memcpy(ent.name, buf1, sizeof(ent.name)); error = -ENOMEM; - res = idtoname_update(&ent, res); + res = idtoname_update(cd, &ent, res); if (res == NULL) goto out; - cache_put(&res->h, &idtoname_cache); + cache_put(&res->h, cd); error = 0; out: @@ -275,10 +269,9 @@ out: static struct ent * -idtoname_lookup(struct ent *item) +idtoname_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(&idtoname_cache, - &item->h, + struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, idtoname_hash(item)); if (ch) return container_of(ch, struct ent, h); @@ -287,10 +280,9 @@ idtoname_lookup(struct ent *item) } static struct ent * -idtoname_update(struct ent *new, struct ent *old) +idtoname_update(struct cache_detail *cd, struct ent *new, struct ent *old) { - struct cache_head *ch = sunrpc_cache_update(&idtoname_cache, - &new->h, &old->h, + struct cache_head *ch = sunrpc_cache_update(cd, &new->h, &old->h, idtoname_hash(new)); if (ch) return container_of(ch, struct ent, h); @@ -303,8 +295,6 @@ idtoname_update(struct ent *new, struct ent *old) * Name -> ID cache */ -static struct cache_head *nametoid_table[ENT_HASHMAX]; - static inline int nametoid_hash(struct ent *ent) { @@ -353,14 +343,14 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) return 0; } -static struct ent *nametoid_lookup(struct ent *); -static struct ent *nametoid_update(struct ent *, struct ent *); +static struct ent *nametoid_lookup(struct cache_detail *, struct ent *); +static struct ent *nametoid_update(struct cache_detail *, struct ent *, + struct ent *); static int nametoid_parse(struct cache_detail *, char *, int); -static struct cache_detail nametoid_cache = { +static struct cache_detail nametoid_cache_template = { .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, - .hash_table = nametoid_table, .name = "nfs4.nametoid", .cache_put = ent_put, .cache_request = nametoid_request, @@ -420,14 +410,14 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) set_bit(CACHE_NEGATIVE, &ent.h.flags); error = -ENOMEM; - res = nametoid_lookup(&ent); + res = nametoid_lookup(cd, &ent); if (res == NULL) goto out; - res = nametoid_update(&ent, res); + res = nametoid_update(cd, &ent, res); if (res == NULL) goto out; - cache_put(&res->h, &nametoid_cache); + cache_put(&res->h, cd); error = 0; out: kfree(buf1); @@ -437,10 +427,9 @@ out: static struct ent * -nametoid_lookup(struct ent *item) +nametoid_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(&nametoid_cache, - &item->h, + struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, nametoid_hash(item)); if (ch) return container_of(ch, struct ent, h); @@ -449,10 +438,9 @@ nametoid_lookup(struct ent *item) } static struct ent * -nametoid_update(struct ent *new, struct ent *old) +nametoid_update(struct cache_detail *cd, struct ent *new, struct ent *old) { - struct cache_head *ch = sunrpc_cache_update(&nametoid_cache, - &new->h, &old->h, + struct cache_head *ch = sunrpc_cache_update(cd, &new->h, &old->h, nametoid_hash(new)); if (ch) return container_of(ch, struct ent, h); @@ -465,129 +453,67 @@ nametoid_update(struct ent *new, struct ent *old) */ int -nfsd_idmap_init(void) +nfsd_idmap_init(struct net *net) { int rv; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - rv = cache_register(&idtoname_cache); + nn->idtoname_cache = cache_create_net(&idtoname_cache_template, net); + if (IS_ERR(nn->idtoname_cache)) + return PTR_ERR(nn->idtoname_cache); + rv = cache_register_net(nn->idtoname_cache, net); if (rv) - return rv; - rv = cache_register(&nametoid_cache); + goto destroy_idtoname_cache; + nn->nametoid_cache = cache_create_net(&nametoid_cache_template, net); + if (IS_ERR(nn->nametoid_cache)) { + rv = PTR_ERR(nn->nametoid_cache); + goto unregister_idtoname_cache; + } + rv = cache_register_net(nn->nametoid_cache, net); if (rv) - cache_unregister(&idtoname_cache); + goto destroy_nametoid_cache; + return 0; + +destroy_nametoid_cache: + cache_destroy_net(nn->nametoid_cache, net); +unregister_idtoname_cache: + cache_unregister_net(nn->idtoname_cache, net); +destroy_idtoname_cache: + cache_destroy_net(nn->idtoname_cache, net); return rv; } void -nfsd_idmap_shutdown(void) -{ - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); -} - -/* - * Deferred request handling - */ - -struct idmap_defer_req { - struct cache_req req; - struct cache_deferred_req deferred_req; - wait_queue_head_t waitq; - atomic_t count; -}; - -static inline void -put_mdr(struct idmap_defer_req *mdr) -{ - if (atomic_dec_and_test(&mdr->count)) - kfree(mdr); -} - -static inline void -get_mdr(struct idmap_defer_req *mdr) -{ - atomic_inc(&mdr->count); -} - -static void -idmap_revisit(struct cache_deferred_req *dreq, int toomany) -{ - struct idmap_defer_req *mdr = - container_of(dreq, struct idmap_defer_req, deferred_req); - - wake_up(&mdr->waitq); - put_mdr(mdr); -} - -static struct cache_deferred_req * -idmap_defer(struct cache_req *req) -{ - struct idmap_defer_req *mdr = - container_of(req, struct idmap_defer_req, req); - - mdr->deferred_req.revisit = idmap_revisit; - get_mdr(mdr); - return (&mdr->deferred_req); -} - -static inline int -do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key, - struct cache_detail *detail, struct ent **item, - struct idmap_defer_req *mdr) +nfsd_idmap_shutdown(struct net *net) { - *item = lookup_fn(key); - if (!*item) - return -ENOMEM; - return cache_check(detail, &(*item)->h, &mdr->req); -} + struct nfsd_net *nn = net_generic(net, nfsd_net_id); -static inline int -do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), - struct ent *key, struct cache_detail *detail, - struct ent **item) -{ - int ret = -ENOMEM; - - *item = lookup_fn(key); - if (!*item) - goto out_err; - ret = -ETIMEDOUT; - if (!test_bit(CACHE_VALID, &(*item)->h.flags) - || (*item)->h.expiry_time < get_seconds() - || detail->flush_time > (*item)->h.last_refresh) - goto out_put; - ret = -ENOENT; - if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) - goto out_put; - return 0; -out_put: - cache_put(&(*item)->h, detail); -out_err: - *item = NULL; - return ret; + cache_unregister_net(nn->idtoname_cache, net); + cache_unregister_net(nn->nametoid_cache, net); + cache_destroy_net(nn->idtoname_cache, net); + cache_destroy_net(nn->nametoid_cache, net); } static int idmap_lookup(struct svc_rqst *rqstp, - struct ent *(*lookup_fn)(struct ent *), struct ent *key, - struct cache_detail *detail, struct ent **item) + struct ent *(*lookup_fn)(struct cache_detail *, struct ent *), + struct ent *key, struct cache_detail *detail, struct ent **item) { - struct idmap_defer_req *mdr; int ret; - mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); - if (!mdr) + *item = lookup_fn(detail, key); + if (!*item) return -ENOMEM; - atomic_set(&mdr->count, 1); - init_waitqueue_head(&mdr->waitq); - mdr->req.defer = idmap_defer; - ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); - if (ret == -EAGAIN) { - wait_event_interruptible_timeout(mdr->waitq, - test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); - ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); + retry: + ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle); + + if (ret == -ETIMEDOUT) { + struct ent *prev_item = *item; + *item = lookup_fn(detail, key); + if (*item != prev_item) + goto retry; + cache_put(&(*item)->h, detail); } - put_mdr(mdr); return ret; } @@ -600,74 +526,145 @@ rqst_authname(struct svc_rqst *rqstp) return clp->name; } -static int +static __be32 idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, - uid_t *id) + u32 *id) { struct ent *item, key = { .type = type, }; int ret; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if (namelen + 1 > sizeof(key.name)) - return -EINVAL; + return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); - ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); + ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) - ret = -ESRCH; /* nfserr_badname */ + return nfserr_badowner; if (ret) - return ret; + return nfserrno(ret); *id = item->id; - cache_put(&item->h, &nametoid_cache); + cache_put(&item->h, nn->nametoid_cache); return 0; } -static int -idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id) +{ + char buf[11]; + int len; + __be32 *p; + + len = sprintf(buf, "%u", id); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, buf, len); + return 0; +} + +static __be32 idmap_id_to_name(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) { struct ent *item, key = { .id = id, .type = type, }; + __be32 *p; int ret; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); - ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); + ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) - return sprintf(name, "%u", id); + return encode_ascii_id(xdr, id); if (ret) - return ret; + return nfserrno(ret); ret = strlen(item->name); - BUG_ON(ret > IDMAP_NAMESZ); - memcpy(name, item->name, ret); - cache_put(&item->h, &idtoname_cache); - return ret; + WARN_ON_ONCE(ret > IDMAP_NAMESZ); + p = xdr_reserve_space(xdr, ret + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, item->name, ret); + cache_put(&item->h, nn->idtoname_cache); + return 0; } -int +static bool +numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) +{ + int ret; + char buf[11]; + + if (namelen + 1 > sizeof(buf)) + /* too long to represent a 32-bit id: */ + return false; + /* Just to make sure it's null-terminated: */ + memcpy(buf, name, namelen); + buf[namelen] = '\0'; + ret = kstrtouint(buf, 10, id); + return ret == 0; +} + +static __be32 +do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id) +{ + if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + if (numeric_name_to_id(rqstp, type, name, namelen, id)) + return 0; + /* + * otherwise, fall through and try idmapping, for + * backwards compatibility with clients sending names: + */ + return idmap_name_to_id(rqstp, type, name, namelen, id); +} + +static __be32 encode_name_from_id(struct xdr_stream *xdr, + struct svc_rqst *rqstp, int type, u32 id) +{ + if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) + return encode_ascii_id(xdr, id); + return idmap_id_to_name(xdr, rqstp, type, id); +} + +__be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kuid_t *uid) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id); + *uid = make_kuid(&init_user_ns, id); + if (!uid_valid(*uid)) + status = nfserr_badowner; + return status; } -int +__be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, - __u32 *id) + kgid_t *gid) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); + __be32 status; + u32 id = -1; + status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id); + *gid = make_kgid(&init_user_ns, id); + if (!gid_valid(*gid)) + status = nfserr_badowner; + return status; } -int -nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kuid_t uid) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); + u32 id = from_kuid(&init_user_ns, uid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id); } -int -nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp, + kgid_t gid) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); + u32 id = from_kgid(&init_user_ns, gid); + return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c593db047d8..8f029db5d27 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1,6 +1,4 @@ /* - * fs/nfsd/nfs4proc.c - * * Server-side procedures for NFSv4. * * Copyright (c) 2002 The Regents of the University of Michigan. @@ -34,23 +32,145 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#include <linux/param.h> -#include <linux/major.h> -#include <linux/slab.h> #include <linux/file.h> +#include <linux/slab.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfs4.h> -#include <linux/nfsd/state.h> -#include <linux/nfsd/xdr4.h> -#include <linux/nfs4_acl.h> -#include <linux/sunrpc/gss_api.h> +#include "idmap.h" +#include "cache.h" +#include "xdr4.h" +#include "vfs.h" +#include "current_stateid.h" +#include "netns.h" +#include "acl.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC +static u32 nfsd_attrmask[] = { + NFSD_WRITEABLE_ATTRS_WORD0, + NFSD_WRITEABLE_ATTRS_WORD1, + NFSD_WRITEABLE_ATTRS_WORD2 +}; + +static u32 nfsd41_ex_attrmask[] = { + NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2 +}; + +static __be32 +check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + u32 *bmval, u32 *writable) +{ + struct dentry *dentry = cstate->current_fh.fh_dentry; + + /* + * Check about attributes are supported by the NFSv4 server or not. + * According to spec, unsupported attributes return ERR_ATTRNOTSUPP. + */ + if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) || + (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) || + (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) + return nfserr_attrnotsupp; + + /* + * Check FATTR4_WORD0_ACL can be supported + * in current environment or not. + */ + if (bmval[0] & FATTR4_WORD0_ACL) { + if (!IS_POSIXACL(dentry->d_inode)) + return nfserr_attrnotsupp; + } + + /* + * According to spec, read-only attributes return ERR_INVAL. + */ + if (writable) { + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || + (bmval[2] & ~writable[2])) + return nfserr_inval; + } + + return nfs_ok; +} + +static __be32 +nfsd4_check_open_attributes(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + __be32 status = nfs_ok; + + if (open->op_create == NFS4_OPEN_CREATE) { + if (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd_attrmask); + else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd41_ex_attrmask); + } + + return status; +} + +static int +is_create_with_attrs(struct nfsd4_open *open) +{ + return open->op_create == NFS4_OPEN_CREATE + && (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED + || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); +} + +/* + * if error occurs when setting the acl, just clear the acl bit + * in the returned attr bitmap. + */ +static void +do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl, u32 *bmval) +{ + __be32 status; + + status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); + if (status) + /* + * We should probably fail the whole open at this point, + * but we've already created the file, so it's too late; + * So this seems the least of evils: + */ + bmval[0] &= ~FATTR4_WORD0_ACL; +} + static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -70,77 +190,131 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; + accmode |= NFSD_MAY_READ_IF_EXEC; + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - accmode |= MAY_READ; + accmode |= NFSD_MAY_READ; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - accmode |= (MAY_WRITE | MAY_TRUNC); - if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) - accmode |= MAY_WRITE; + accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC); + if (open->op_share_deny & NFS4_SHARE_DENY_READ) + accmode |= NFSD_MAY_WRITE; status = fh_verify(rqstp, current_fh, S_IFREG, accmode); return status; } +static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) +{ + umode_t mode = fh->fh_dentry->d_inode->i_mode; + + if (S_ISREG(mode)) + return nfs_ok; + if (S_ISDIR(mode)) + return nfserr_isdir; + /* + * Using err_symlink as our catch-all case may look odd; but + * there's no other obvious error for this case in 4.0, and we + * happen to know that it will cause the linux v4 client to do + * the right thing on attempts to open something other than a + * regular file. + */ + return nfserr_symlink; +} + +static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) +{ + if (nfsd4_has_session(cstate)) + return; + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, + &resfh->fh_handle); +} + static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { - struct svc_fh resfh; + struct svc_fh *current_fh = &cstate->current_fh; + int accmode; __be32 status; - int created = 0; - fh_init(&resfh, NFS4_FHSIZE); + *resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + if (!*resfh) + return nfserr_jukebox; + fh_init(*resfh, NFS4_FHSIZE); open->op_truncate = 0; if (open->op_create) { + /* FIXME: check session persistence and pnfs flags. + * The nfsv4.1 spec requires the following semantics: + * + * Persistent | pNFS | Server REQUIRED | Client Allowed + * Reply Cache | server | | + * -------------+--------+-----------------+-------------------- + * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * | | | (SHOULD) + * | | and EXCLUSIVE4 | or EXCLUSIVE4 + * | | | (SHOULD NOT) + * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1 + * yes | no | GUARDED4 | GUARDED4 + * yes | yes | GUARDED4 | GUARDED4 + */ + /* * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3. + * in NFSv4 as in v3 except EXCLUSIVE4_1. */ - status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, - &resfh, open->op_createmode, + *resfh, open->op_createmode, (u32 *)open->op_verf.data, - &open->op_truncate, &created); + &open->op_truncate, &open->op_created); + + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); - /* If we ever decide to use different attrs to store the - * verifier in nfsd_create_v3, then we'll need to change this + /* + * Following rfc 3530 14.2.16, use the returned bitmask + * to indicate which attributes we used to store the + * verifier: */ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) - open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); - } else { + open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY); + } else + /* + * Note this may exit with the parent still locked. + * We will hold the lock until nfsd4_open's final + * lookup, to prevent renames or unlinks until we've had + * a chance to an acquire a delegation if appropriate. + */ status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, &resfh); - fh_unlock(current_fh); - } + open->op_fname.data, open->op_fname.len, *resfh); + if (status) + goto out; + status = nfsd_check_obj_isreg(*resfh); if (status) goto out; - set_change_info(&open->op_cinfo, current_fh); - - /* set reply cache */ - fh_dup2(current_fh, &resfh); - open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; - memcpy(open->op_stateowner->so_replay.rp_openfh, - &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); - - if (!created) - status = do_open_permission(rqstp, current_fh, open, MAY_NOP); + if (is_create_with_attrs(open) && open->op_acl != NULL) + do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); + nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); + accmode = NFSD_MAY_NOP; + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, *resfh, open, accmode); + set_change_info(&open->op_cinfo, current_fh); out: - fh_put(&resfh); return status; } static __be32 -do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { + struct svc_fh *current_fh = &cstate->current_fh; __be32 status; - - /* Only reclaims from previously confirmed clients are valid */ - if ((status = nfs4_check_open_reclaim(&open->op_clientid))) - return status; + int accmode = 0; /* We don't know the target directory, and therefore can not * set the change info @@ -148,45 +322,80 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); - /* set replay cache */ - open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; - memcpy(open->op_stateowner->so_replay.rp_openfh, - ¤t_fh->fh_handle.fh_base, - current_fh->fh_handle.fh_size); + nfsd4_set_open_owner_reply_cache(cstate, open, current_fh); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); + /* + * In the delegation case, the client is telling us about an + * open that it *already* performed locally, some time ago. We + * should let it succeed now if possible. + * + * In the case of a CLAIM_FH open, on the other hand, the client + * may be counting on us to enforce permissions (the Linux 4.1 + * client uses this for normal opens, for example). + */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) + accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, accmode); return status; } +static void +copy_clientid(clientid_t *clid, struct nfsd4_session *session) +{ + struct nfsd4_sessionid *sid = + (struct nfsd4_sessionid *)session->se_sessionid.data; + + clid->cl_boot = sid->clientid.cl_boot; + clid->cl_id = sid->clientid.cl_id; +} static __be32 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { __be32 status; - dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", + struct svc_fh *resfh = NULL; + struct nfsd4_compoundres *resp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, - open->op_stateowner); + open->op_openowner); /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + open->op_created = 0; + /* + * RFC5661 18.51.3 + * Before RECLAIM_COMPLETE done, server should deny new lock + */ + if (nfsd4_has_session(cstate) && + !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags) && + open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + + if (nfsd4_has_session(cstate)) + copy_clientid(&open->op_clientid, cstate->session); + nfs4_lock_state(); /* check seqid for replay. set nfs4_owner */ - status = nfsd4_process_open1(open); + resp = rqstp->rq_resp; + status = nfsd4_process_open1(&resp->cstate, open, nn); if (status == nfserr_replay_me) { - struct nfs4_replay *rp = &open->op_stateowner->so_replay; + struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); - cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; - memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, - rp->rp_openfh_len); - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + fh_copy_shallow(&cstate->current_fh.fh_handle, + &rp->rp_openfh); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) dprintk("nfsd4_open: replay failed" " restoring previous filehandle\n"); @@ -195,49 +404,47 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (status) goto out; + if (open->op_xdr_error) { + status = open->op_xdr_error; + goto out; + } + + status = nfsd4_check_open_attributes(rqstp, cstate, open); + if (status) + goto out; /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: - status = nfserr_inval; - if (open->op_create) - goto out; - /* fall through */ case NFS4_OPEN_CLAIM_NULL: - /* - * (1) set CURRENT_FH to the file being opened, - * creating it if necessary, (2) set open->op_cinfo, - * (3) set open->op_truncate if the file is to be - * truncated after opening, (4) do permission checking. - */ - status = do_open_lookup(rqstp, &cstate->current_fh, - open); + status = do_open_lookup(rqstp, cstate, open, &resfh); if (status) goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_stateowner->so_confirmed = 1; - /* - * The CURRENT_FH is already set to the file being - * opened. (1) set open->op_cinfo, (2) set - * open->op_truncate if the file is to be truncated - * after opening, (3) do permission checking. - */ - status = do_open_fhandle(rqstp, &cstate->current_fh, - open); + status = nfs4_check_open_reclaim(&open->op_clientid, + cstate->minorversion, + nn); + if (status) + goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; + resfh = &cstate->current_fh; break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_stateowner->so_confirmed = 1; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; @@ -253,17 +460,42 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * successful, it (1) truncates the file if open->op_truncate was * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ - status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); + status = nfsd4_process_open2(rqstp, resfh, open); + WARN_ON(status && open->op_created); out: - if (open->op_stateowner) { - nfs4_get_stateowner(open->op_stateowner); - cstate->replay_owner = open->op_stateowner; + if (resfh && resfh != &cstate->current_fh) { + fh_dup2(&cstate->current_fh, resfh); + fh_put(resfh); + kfree(resfh); } - nfs4_unlock_state(); + nfsd4_cleanup_open_state(open, status); + if (open->op_openowner && !nfsd4_has_session(cstate)) + cstate->replay_owner = &open->op_openowner->oo_owner; + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } /* + * OPEN is the only seqid-mutating operation whose decoding can fail + * with a seqid-mutating error (specifically, decoding of user names in + * the attributes). Therefore we have to do some processing to look up + * the stateowner so that we can bump the seqid. + */ +static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op) +{ + struct nfsd4_open *open = (struct nfsd4_open *)&op->u; + + if (!seqid_mutating_err(ntohl(op->status))) + return op->status; + if (nfsd4_has_session(cstate)) + return op->status; + open->op_xdr_error = op->status; + return nfsd4_open(rqstp, cstate, open); +} + +/* * filehandle-manipulating ops. */ static __be32 @@ -285,7 +517,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); } static __be32 @@ -307,6 +539,10 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); + if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } return nfs_ok; } @@ -318,6 +554,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; fh_dup2(&cstate->save_fh, &cstate->current_fh); + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + } return nfs_ok; } @@ -336,21 +576,23 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } +static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) +{ + __be32 verf[2]; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + verf[0] = (__be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + memcpy(verifier->data, verf, sizeof(verifier->data)); +} + static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - __be32 status; - - u32 *p = (u32 *)commit->co_verf.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; - - status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); + return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); - if (status == nfserr_symlink) - status = nfserr_inval; - return status; } static __be32 @@ -363,23 +605,18 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_init(&resfh, NFS4_FHSIZE); - status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE); - if (status == nfserr_symlink) - status = nfserr_notdir; + status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, + NFSD_MAY_CREATE); + if (status) + return status; + + status = check_attr_support(rqstp, cstate, create->cr_bmval, + nfsd_attrmask); if (status) return status; switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, @@ -429,12 +666,20 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_badtype; } - if (!status) { - fh_unlock(&cstate->current_fh); - set_change_info(&create->cr_cinfo, &cstate->current_fh); - fh_dup2(&cstate->current_fh, &resfh); - } + if (status) + goto out; + + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, + create->cr_bmval); + + fh_unlock(&cstate->current_fh); + set_change_info(&create->cr_cinfo, &cstate->current_fh); + fh_dup2(&cstate->current_fh, &resfh); +out: fh_put(&resfh); return status; } @@ -445,15 +690,16 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; - getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); getattr->ga_fhp = &cstate->current_fh; return nfs_ok; @@ -474,9 +720,7 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } -static __be32 -nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - void *arg) +static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh) { struct svc_fh tmp_fh; __be32 ret; @@ -485,13 +729,19 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = exp_pseudoroot(rqstp, &tmp_fh); if (ret) return ret; - if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { + if (tmp_fh.fh_dentry == fh->fh_dentry) { fh_put(&tmp_fh); return nfserr_noent; } fh_put(&tmp_fh); - return nfsd_lookup(rqstp, &cstate->current_fh, - "..", 2, &cstate->current_fh); + return nfsd_lookup(rqstp, fh, "..", 2, fh); +} + +static __be32 +nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + void *arg) +{ + return nfsd4_do_lookupp(rqstp, &cstate->current_fh); } static __be32 @@ -515,19 +765,26 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); + /* + * If we do a zero copy read, then a client will see read data + * that reflects the state of the file *after* performing the + * following compound. + * + * To ensure proper ordering, we therefore turn off zero copy if + * the client wants us to do more in this compound: + */ + if (!nfsd4_last_compound_op(rqstp)) + rqstp->rq_splice_ok = false; + /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &read->rd_stateid, - CHECK_FH | RD_STATE, &read->rd_filp))) { + if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, &read->rd_stateid, + RD_STATE, &read->rd_filp))) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -545,10 +802,11 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) return nfserr_inval; - readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; - readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); + readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); + readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); - if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || + if ((cookie == 1) || (cookie == 2) || (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) return nfserr_bad_cookie; @@ -572,12 +830,10 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (status == nfserr_symlink) - return nfserr_notdir; if (!status) { fh_unlock(&cstate->current_fh); set_change_info(&remove->rm_cinfo, &cstate->current_fh); @@ -593,29 +849,17 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags - & NFSEXP_NOSUBTREECHECK)) + if (locks_in_grace(SVC_NET(rqstp)) && + !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, rename->rn_snamelen, &cstate->current_fh, rename->rn_tname, rename->rn_tnamelen); - - /* the underlying filesystem returns different error's than required - * by NFSv4. both save_fh and current_fh have been verified.. */ - if (status == nfserr_isdir) - status = nfserr_exist; - else if ((status == nfserr_notdir) && - (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && - S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) - status = nfserr_exist; - else if (status == nfserr_symlink) - status = nfserr_notdir; - - if (!status) { - set_change_info(&rename->rn_sinfo, &cstate->current_fh); - set_change_info(&rename->rn_tinfo, &cstate->save_fh); - } - return status; + if (status) + return status; + set_change_info(&rename->rn_sinfo, &cstate->current_fh); + set_change_info(&rename->rn_tinfo, &cstate->save_fh); + return nfs_ok; } static __be32 @@ -628,6 +872,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); @@ -639,76 +886,132 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } else secinfo->si_exp = exp; dput(dentry); + if (cstate->minorversion) + /* See rfc 5661 section 2.6.3.1.1.8 */ + fh_put(&cstate->current_fh); return err; } static __be32 +nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_secinfo_no_name *sin) +{ + __be32 err; + + switch (sin->sin_style) { + case NFS4_SECINFO_STYLE4_CURRENT_FH: + break; + case NFS4_SECINFO_STYLE4_PARENT: + err = nfsd4_do_lookupp(rqstp, &cstate->current_fh); + if (err) + return err; + break; + default: + return nfserr_inval; + } + exp_get(cstate->current_fh.fh_export); + sin->sin_exp = cstate->current_fh.fh_export; + fh_put(&cstate->current_fh); + return nfs_ok; +} + +static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) { __be32 status = nfs_ok; + int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); - nfs4_unlock_state(); + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, + &setattr->sa_stateid, WR_STATE, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; } } + err = fh_want_write(&cstate->current_fh); + if (err) + return nfserrno(err); status = nfs_ok; + + status = check_attr_support(rqstp, cstate, setattr->sa_bmval, + nfsd_attrmask); + if (status) + goto out; + if (setattr->sa_acl != NULL) status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, setattr->sa_acl); if (status) - return status; + goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); +out: + fh_drop_write(&cstate->current_fh); return status; } +static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write) +{ + int i = 1; + int buflen = write->wr_buflen; + + vec[0].iov_base = write->wr_head.iov_base; + vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len); + buflen -= vec[0].iov_len; + + while (buflen) { + vec[i].iov_base = page_address(write->wr_pagelist[i - 1]); + vec[i].iov_len = min_t(int, PAGE_SIZE, buflen); + buflen -= vec[i].iov_len; + i++; + } + return i; +} + static __be32 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_write *write) { stateid_t *stateid = &write->wr_stateid; struct file *filp = NULL; - u32 *p; __be32 status = nfs_ok; + unsigned long cnt; + int nvecs; /* no need to check permission - this will be done in nfsd_write() */ if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, - CHECK_FH | WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - + status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), + cstate, stateid, WR_STATE, &filp); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } - write->wr_bytes_written = write->wr_buflen; + cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; - p = (u32 *)write->wr_verifier.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; + gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); + + nvecs = fill_in_write_vector(rqstp->rq_vec, write); + WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, write->wr_vlen, - write->wr_buflen, &write->wr_how_written); + write->wr_offset, rqstp->rq_vec, nvecs, + &cnt, &write->wr_how_written); if (filp) fput(filp); - if (status == nfserr_symlink) - status = nfserr_inval; + write->wr_bytes_written = cnt; + return status; } @@ -725,13 +1028,14 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int count; __be32 status; - status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + if (status) + return status; + + status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL); if (status) return status; - if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) - || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) - return nfserr_attrnotsupp; if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) return nfserr_inval; @@ -744,21 +1048,25 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, count = 4 + (verify->ve_attrlen >> 2); buf = kmalloc(count << 2, GFP_KERNEL); if (!buf) - return nfserr_resource; + return nfserr_jukebox; - status = nfsd4_encode_fattr(&cstate->current_fh, + p = buf; + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); - - /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + /* + * If nfsd4_encode_fattr() ran out of space, assume that's because + * the attributes are longer (hence different) than those given: + */ + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; - p = buf + 3; + /* skip bitmap */ + p = buf + 1 + ntohl(buf[0]); status = nfserr_not_same; if (ntohl(*p++) != verify->ve_attrlen) goto out_kfree; @@ -805,43 +1113,153 @@ static inline void nfsd4_increment_op_stats(u32 opnum) nfsdstats.nfs4_opcount[opnum]++; } -static void cstate_free(struct nfsd4_compound_state *cstate) -{ - if (cstate == NULL) - return; - fh_put(&cstate->current_fh); - fh_put(&cstate->save_fh); - BUG_ON(cstate->replay_owner); - kfree(cstate); -} - -static struct nfsd4_compound_state *cstate_alloc(void) -{ - struct nfsd4_compound_state *cstate; - - cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL); - if (cstate == NULL) - return NULL; - fh_init(&cstate->current_fh, NFS4_FHSIZE); - fh_init(&cstate->save_fh, NFS4_FHSIZE); - cstate->replay_owner = NULL; - return cstate; -} - typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); +typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); +typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *); +typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *); + +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, +}; struct nfsd4_operation { nfsd4op_func op_func; u32 op_flags; -/* Most ops require a valid current filehandle; a few don't: */ -#define ALLOWED_WITHOUT_FH 1 -/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ -#define ALLOWED_ON_ABSENT_FS 2 + char *op_name; + /* Try to get response size before operation */ + nfsd4op_rsize op_rsize_bop; + stateid_getter op_get_currentstateid; + stateid_setter op_set_currentstateid; }; static struct nfsd4_operation nfsd4_ops[]; +static const char *nfsd4_op_name(unsigned opnum); + +/* + * Enforce NFSv4.1 COMPOUND ordering rules: + * + * Also note, enforced elsewhere: + * - SEQUENCE other than as first op results in + * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) + * - BIND_CONN_TO_SESSION must be the only op in its compound. + * (Enforced in nfsd4_bind_conn_to_session().) + * - DESTROY_SESSION must be the final operation in a compound, if + * sessionid's in SEQUENCE and DESTROY_SESSION are the same. + * (Enforced in nfsd4_destroy_session().) + */ +static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) +{ + struct nfsd4_op *op = &args->ops[0]; + + /* These ordering requirements don't apply to NFSv4.0: */ + if (args->minorversion == 0) + return nfs_ok; + /* This is weird, but OK, not our problem: */ + if (args->opcnt == 0) + return nfs_ok; + if (op->status == nfserr_op_illegal) + return nfs_ok; + if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP)) + return nfserr_op_not_in_session; + if (op->opnum == OP_SEQUENCE) + return nfs_ok; + if (args->opcnt != 1) + return nfserr_not_only_op; + return nfs_ok; +} + +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + +bool nfsd4_cache_this_op(struct nfsd4_op *op) +{ + if (op->opnum == OP_ILLEGAL) + return false; + return OPDESC(op)->op_flags & OP_CACHEME; +} + +static bool need_wrongsec_check(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *next = &argp->ops[resp->opcnt]; + struct nfsd4_operation *thisd; + struct nfsd4_operation *nextd; + + thisd = OPDESC(this); + /* + * Most ops check wronsec on our own; only the putfh-like ops + * have special rules. + */ + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) + return false; + /* + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a + * put-filehandle operation if we're not going to use the + * result: + */ + if (argp->opcnt == resp->opcnt) + return false; + + nextd = OPDESC(next); + /* + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC + * errors themselves as necessary; others should check for them + * now: + */ + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); +} + +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->iov = head; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +} + /* * COMPOUND call. */ @@ -852,89 +1270,129 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, { struct nfsd4_op *op; struct nfsd4_operation *opdesc; - struct nfsd4_compound_state *cstate = NULL; - int slack_bytes; + struct nfsd4_compound_state *cstate = &resp->cstate; + struct svc_fh *current_fh = &cstate->current_fh; + struct svc_fh *save_fh = &cstate->save_fh; __be32 status; - status = nfserr_resource; - cstate = cstate_alloc(); - if (cstate == NULL) - goto out; - - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + svcxdr_init_encode(rqstp, resp); + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; + cstate->minorversion = args->minorversion; + fh_init(current_fh, NFS4_FHSIZE); + fh_init(save_fh, NFS4_FHSIZE); + /* + * Don't use the deferral mechanism for NFSv4; compounds make it + * too hard to avoid non-idempotency problems. + */ + rqstp->rq_usedeferral = 0; /* * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; - status = nfs_ok; + status = nfs41_check_op_ordering(args); + if (status) { + op = &args->ops[0]; + op->status = status; + goto encode_op; + } + while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum); - + dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", + resp->opcnt, args->opcnt, op->opnum, + nfsd4_op_name(op->opnum)); /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error * it will be set to nfserr_bad_xdr. */ - if (op->status) - goto encode_op; - - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->end - (char *)resp->p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; + if (op->status) { + if (op->opnum == OP_OPEN) + op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); - if (!cstate->current_fh.fh_dentry) { + if (!current_fh->fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export->ex_fslocs.migrated && !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } - if (opdesc->op_func) - op->status = opdesc->op_func(rqstp, cstate, &op->u); - else - BUG_ON(op->status == nfs_ok); + fh_clear_wcc(current_fh); + + /* If op is non-idempotent */ + if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + /* + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, plen); + } + + if (op->status) + goto encode_op; + + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); + op->status = opdesc->op_func(rqstp, cstate, &op->u); + + if (!op->status) { + if (opdesc->op_set_currentstateid) + opdesc->op_set_currentstateid(cstate, &op->u); + + if (opdesc->op_flags & OP_CLEAR_STATEID) + clear_current_stateid(cstate); + + if (need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(current_fh->fh_export, rqstp); + } encode_op: + /* Only from SEQUENCE */ + if (cstate->status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); status = op->status; } + + dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n", + args->ops, args->opcnt, resp->opcnt, op->opnum, + be32_to_cpu(status)); + if (cstate->replay_owner) { - nfs4_put_stateowner(cstate->replay_owner); + nfs4_unlock_state(); cstate->replay_owner = NULL; } /* XXX Ugh, we need to get rid of this kind of special case: */ @@ -944,160 +1402,546 @@ encode_op: nfsd4_increment_op_stats(op->opnum); } + cstate->status = status; + fh_put(current_fh); + fh_put(save_fh); + BUG_ON(cstate->replay_owner); out: - nfsd4_release_compoundargs(args); - cstate_free(cstate); + /* Reset deferral mechanism for RPC deferrals */ + rqstp->rq_usedeferral = 1; + dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } -static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { +#define op_encode_hdr_size (2) +#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define op_encode_change_info_maxsz (5) +#define nfs4_fattr_bitmap_maxsz (4) + +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) +#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) + +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) + +#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz) +#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \ + op_encode_ace_maxsz) + +#define op_encode_channel_attrs_maxsz (6 + 1 + 1) + +static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size) * sizeof(__be32); +} + +static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); +} + +static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFSD4_MAX_SEC_LABEL_LEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + +static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_lock_denied_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz + + op_encode_change_info_maxsz + 1 + + nfs4_fattr_bitmap_maxsz + + op_encode_delegation_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = op->u.read.rd_length; + + if (rlen > maxcount) + rlen = maxcount; + + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = svc_max_payload(rqstp); + u32 rlen = op->u.readdir.rd_maxcount; + + if (rlen > maxcount) + rlen = maxcount; + + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + op_encode_change_info_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return NFS4_MAX_SESSIONID_LEN + 20; +} + +static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); +} + +static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ + 1 + 1 + /* eir_flags, spr_how */\ + 4 + /* spo_must_enforce & _allow with bitmap */\ + 2 + /*eir_server_owner.so_minor_id */\ + /* eir_server_owner.so_major_id<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + /* eir_server_scope<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + 1 + /* eir_server_impl_id array length */\ + 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); +} + +static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ + 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); +} + +static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ + 2 + /* csr_sequence, csr_flags */\ + op_encode_channel_attrs_maxsz + \ + op_encode_channel_attrs_maxsz) * sizeof(__be32); +} + +static struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, + .op_name = "OP_ACCESS", }, [OP_CLOSE] = { .op_func = (nfsd4op_func)nfsd4_close, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_CLOSE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid, }, [OP_COMMIT] = { .op_func = (nfsd4op_func)nfsd4_commit, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_COMMIT", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize, }, [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID, + .op_name = "OP_CREATE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, }, [OP_DELEGRETURN] = { .op_func = (nfsd4op_func)nfsd4_delegreturn, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_DELEGRETURN", + .op_rsize_bop = nfsd4_only_status_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid, }, [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, + .op_name = "OP_GETATTR", }, [OP_GETFH] = { .op_func = (nfsd4op_func)nfsd4_getfh, + .op_name = "OP_GETFH", }, [OP_LINK] = { .op_func = (nfsd4op_func)nfsd4_link, + .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING + | OP_CACHEME, + .op_name = "OP_LINK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize, }, [OP_LOCK] = { .op_func = (nfsd4op_func)nfsd4_lock, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LOCK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, + .op_name = "OP_LOCKT", }, [OP_LOCKU] = { .op_func = (nfsd4op_func)nfsd4_locku, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LOCKU", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid, }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, + .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, + .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { .op_func = (nfsd4op_func)nfsd4_nverify, + .op_name = "OP_NVERIFY", }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid, }, [OP_OPEN_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_open_confirm, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, }, [OP_OPEN_DOWNGRADE] = { .op_func = (nfsd4op_func)nfsd4_open_downgrade, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_OPEN_DOWNGRADE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid, }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, + .op_name = "OP_PUTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { - /* unsupported; just for future reference: */ - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_func = (nfsd4op_func)nfsd4_putrootfh, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, + .op_name = "OP_PUTPUBFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, + .op_name = "OP_PUTROOTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, + .op_name = "OP_READ", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, + .op_name = "OP_READDIR", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, [OP_READLINK] = { .op_func = (nfsd4op_func)nfsd4_readlink, + .op_name = "OP_READLINK", }, [OP_REMOVE] = { .op_func = (nfsd4op_func)nfsd4_remove, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_REMOVE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize, }, [OP_RENAME] = { .op_func = (nfsd4op_func)nfsd4_rename, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_RENAME", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize, }, [OP_RENEW] = { .op_func = (nfsd4op_func)nfsd4_renew, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, + .op_name = "OP_RENEW", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + .op_name = "OP_RESTOREFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, + .op_name = "OP_SAVEFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_SECINFO", }, [OP_SETATTR] = { .op_func = (nfsd4op_func)nfsd4_setattr, + .op_name = "OP_SETATTR", + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETCLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize, }, [OP_SETCLIENTID_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETCLIENTID_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_VERIFY] = { .op_func = (nfsd4op_func)nfsd4_verify, + .op_name = "OP_VERIFY", }, [OP_WRITE] = { .op_func = (nfsd4op_func)nfsd4_write, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_WRITE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, + .op_name = "OP_RELEASE_LOCKOWNER", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + + /* NFSv4.1 operations */ + [OP_EXCHANGE_ID] = { + .op_func = (nfsd4op_func)nfsd4_exchange_id, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_EXCHANGE_ID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, + }, + [OP_BACKCHANNEL_CTL] = { + .op_func = (nfsd4op_func)nfsd4_backchannel_ctl, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_BACKCHANNEL_CTL", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_BIND_CONN_TO_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_BIND_CONN_TO_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize, + }, + [OP_CREATE_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_create_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_CREATE_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize, + }, + [OP_DESTROY_SESSION] = { + .op_func = (nfsd4op_func)nfsd4_destroy_session, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_DESTROY_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SEQUENCE] = { + .op_func = (nfsd4op_func)nfsd4_sequence, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_name = "OP_SEQUENCE", + }, + [OP_DESTROY_CLIENTID] = { + .op_func = (nfsd4op_func)nfsd4_destroy_clientid, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_DESTROY_CLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_RECLAIM_COMPLETE] = { + .op_func = (nfsd4op_func)nfsd4_reclaim_complete, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_RECLAIM_COMPLETE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SECINFO_NO_NAME] = { + .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_flags = OP_HANDLES_WRONGSEC, + .op_name = "OP_SECINFO_NO_NAME", + }, + [OP_TEST_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_test_stateid, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_TEST_STATEID", + }, + [OP_FREE_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_free_stateid, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_FREE_STATEID", + .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, }; -#define nfs4svc_decode_voidargs NULL -#define nfs4svc_release_void NULL +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + +static const char *nfsd4_op_name(unsigned opnum) +{ + if (opnum < ARRAY_SIZE(nfsd4_ops)) + return nfsd4_ops[opnum].op_name; + return "unknown_operation"; +} + #define nfsd4_voidres nfsd4_voidargs -#define nfs4svc_release_compound NULL struct nfsd4_voidargs { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd4_proc_##name, \ - (kxdrproc_t) nfs4svc_decode_##argt##args, \ - (kxdrproc_t) nfs4svc_encode_##rest##res, \ - (kxdrproc_t) nfs4svc_release_##relt, \ - sizeof(struct nfsd4_##argt##args), \ - sizeof(struct nfsd4_##rest##res), \ - 0, \ - cache, \ - respsize, \ - } - -/* - * TODO: At the present time, the NFSv4 server does not do XID caching - * of requests. Implementing XID caching would not be a serious problem, - * although it would require a mild change in interfaces since one - * doesn't know whether an NFSv4 request is idempotent until after the - * XDR decode. However, XID caching totally confuses pynfs (Peter - * Astrand's regression testsuite for NFSv4 servers), which reuses - * XID's liberally, so I've left it unimplemented until pynfs generates - * better XID's. - */ static struct svc_procedure nfsd_procedures4[2] = { - PROC(null, void, void, void, RC_NOCACHE, 1), - PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4) + [NFSPROC4_NULL] = { + .pc_func = (svc_procfunc) nfsd4_proc_null, + .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd4_voidargs), + .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 1, + }, + [NFSPROC4_COMPOUND] = { + .pc_func = (svc_procfunc) nfsd4_proc_compound, + .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs, + .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, + .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_release = nfsd4_release_compoundargs, + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = NFSD_BUFSIZE/4, + }, }; struct svc_version nfsd_version4 = { @@ -1106,6 +1950,7 @@ struct svc_version nfsd_version4 = { .vs_proc = nfsd_procedures4, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS4_SVC_XDRSIZE, + .vs_rpcb_optnl = 1, }; /* diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1ff90625860..9c271f42604 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1,7 +1,6 @@ /* -* linux/fs/nfsd/nfs4recover.c -* * Copyright (c) 2004 The Regents of the University of Michigan. +* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com> * All rights reserved. * * Andy Adamson <andros@citi.umich.edu> @@ -33,40 +32,58 @@ * */ -#include <linux/err.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfs4.h> -#include <linux/nfsd/state.h> -#include <linux/nfsd/xdr4.h> -#include <linux/param.h> #include <linux/file.h> +#include <linux/slab.h> #include <linux/namei.h> -#include <asm/uaccess.h> -#include <linux/scatterlist.h> #include <linux/crypto.h> #include <linux/sched.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <net/net_namespace.h> +#include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/cld.h> + +#include "nfsd.h" +#include "state.h" +#include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC +/* Declarations */ +struct nfsd4_client_tracking_ops { + int (*init)(struct net *); + void (*exit)(struct net *); + void (*create)(struct nfs4_client *); + void (*remove)(struct nfs4_client *); + int (*check)(struct nfs4_client *); + void (*grace_done)(struct nfsd_net *, time_t); +}; + /* Globals */ -static struct nameidata rec_dir; -static int rec_dir_init = 0; +static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; -static void -nfs4_save_user(uid_t *saveuid, gid_t *savegid) +static int +nfs4_save_creds(const struct cred **original_creds) { - *saveuid = current->fsuid; - *savegid = current->fsgid; - current->fsuid = 0; - current->fsgid = 0; + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = GLOBAL_ROOT_UID; + new->fsgid = GLOBAL_ROOT_GID; + *original_creds = override_creds(new); + put_cred(new); + return 0; } static void -nfs4_reset_user(uid_t saveuid, gid_t savegid) +nfs4_reset_creds(const struct cred *original) { - current->fsuid = saveuid; - current->fsgid = savegid; + revert_creds(original); } static void @@ -83,300 +100,344 @@ md5_to_hex(char *out, char *md5) *out = '\0'; } -__be32 -nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) +static int +nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) { struct xdr_netobj cksum; struct hash_desc desc; struct scatterlist sg; - __be32 status = nfserr_resource; + int status; dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", clname->len, clname->data); desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(desc.tfm)) + if (IS_ERR(desc.tfm)) { + status = PTR_ERR(desc.tfm); goto out_no_tfm; + } + cksum.len = crypto_hash_digestsize(desc.tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) + if (cksum.data == NULL) { + status = -ENOMEM; goto out; + } sg_init_one(&sg, clname->data, clname->len); - if (crypto_hash_digest(&desc, &sg, sg.length, cksum.data)) + status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data); + if (status) goto out; md5_to_hex(dname, cksum.data); - kfree(cksum.data); - status = nfs_ok; + status = 0; out: + kfree(cksum.data); crypto_free_hash(desc.tfm); out_no_tfm: return status; } +/* + * If we had an error generating the recdir name for the legacy tracker + * then warn the admin. If the error doesn't appear to be transient, + * then disable recovery tracking. + */ static void -nfsd4_sync_rec_dir(void) +legacy_recdir_name_error(struct nfs4_client *clp, int error) { - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); - nfsd_sync_dir(rec_dir.path.dentry); - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + printk(KERN_ERR "NFSD: unable to generate recoverydir " + "name (%d).\n", error); + + /* + * if the algorithm just doesn't exist, then disable the recovery + * tracker altogether. The crypto libs will generally return this if + * FIPS is enabled as well. + */ + if (error == -ENOENT) { + printk(KERN_ERR "NFSD: disabling legacy clientid tracking. " + "Reboot recovery will not function correctly!\n"); + nfsd4_client_tracking_exit(clp->net); + } } -int +static void nfsd4_create_clid_dir(struct nfs4_client *clp) { - char *dname = clp->cl_recdir; - struct dentry *dentry; - uid_t uid; - gid_t gid; + const struct cred *original_cred; + char dname[HEXDIR_LEN]; + struct dentry *dir, *dentry; + struct nfs4_client_reclaim *crp; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); + if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + if (!nn->rec_file) + return; - if (!rec_dir_init || clp->cl_firststate) - return 0; + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(clp, status); - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return; + status = mnt_want_write_file(nn->rec_file); + if (status) + return; + + dir = nn->rec_file->f_path.dentry; /* lock the parent */ - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_lock(&dir->d_inode->i_mutex); - dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); + dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; } - status = -EEXIST; - if (dentry->d_inode) { - dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); + if (dentry->d_inode) + /* + * In the 4.1 case, where we're called from + * reclaim_complete(), records from the previous reboot + * may still be left, so this is OK. + * + * In the 4.0 case, we should never get here; but we may + * as well be forgiving and just succeed silently. + */ goto out_put; - } - status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); + status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); out_put: dput(dentry); out_unlock: - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_unlock(&dir->d_inode->i_mutex); if (status == 0) { - clp->cl_firststate = 1; - nfsd4_sync_rec_dir(); + if (nn->in_grace) { + crp = nfs4_client_to_reclaim(dname, nn); + if (crp) + crp->cr_clp = clp; + } + vfs_fsync(nn->rec_file, 0); + } else { + printk(KERN_ERR "NFSD: failed to write recovery record" + " (err %d); please check that %s exists" + " and is writeable", status, + user_recovery_dirname); } - nfs4_reset_user(uid, gid); - dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); - return status; + mnt_drop_write_file(nn->rec_file); + nfs4_reset_creds(original_cred); } -typedef int (recdir_func)(struct dentry *, struct dentry *); +typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *); -struct dentry_list { - struct dentry *dentry; +struct name_list { + char name[HEXDIR_LEN]; struct list_head list; }; -struct dentry_list_arg { - struct list_head dentries; - struct dentry *parent; +struct nfs4_dir_ctx { + struct dir_context ctx; + struct list_head names; }; static int -nfsd4_build_dentrylist(void *arg, const char *name, int namlen, +nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct dentry_list_arg *dla = arg; - struct list_head *dentries = &dla->dentries; - struct dentry *parent = dla->parent; - struct dentry *dentry; - struct dentry_list *child; + struct nfs4_dir_ctx *ctx = arg; + struct name_list *entry; - if (name && isdotent(name, namlen)) + if (namlen != HEXDIR_LEN - 1) return 0; - dentry = lookup_one_len(name, parent, namlen); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - child = kmalloc(sizeof(*child), GFP_KERNEL); - if (child == NULL) + entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); + if (entry == NULL) return -ENOMEM; - child->dentry = dentry; - list_add(&child->list, dentries); + memcpy(entry->name, name, HEXDIR_LEN - 1); + entry->name[HEXDIR_LEN - 1] = '\0'; + list_add(&entry->list, &ctx->names); return 0; } static int -nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) +nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { - struct file *filp; - struct dentry_list_arg dla = { - .parent = dir, + const struct cred *original_cred; + struct dentry *dir = nn->rec_file->f_path.dentry; + struct nfs4_dir_ctx ctx = { + .ctx.actor = nfsd4_build_namelist, + .names = LIST_HEAD_INIT(ctx.names) }; - struct list_head *dentries = &dla.dentries; - struct dentry_list *child; - uid_t uid; - gid_t gid; int status; - if (!rec_dir_init) - return 0; - - nfs4_save_user(&uid, &gid); - - filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); - status = PTR_ERR(filp); - if (IS_ERR(filp)) - goto out; - INIT_LIST_HEAD(dentries); - status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); - fput(filp); - while (!list_empty(dentries)) { - child = list_entry(dentries->next, struct dentry_list, list); - status = f(dir, child->dentry); - if (status) - goto out; - list_del(&child->list); - dput(child->dentry); - kfree(child); - } -out: - while (!list_empty(dentries)) { - child = list_entry(dentries->next, struct dentry_list, list); - list_del(&child->list); - dput(child->dentry); - kfree(child); - } - nfs4_reset_user(uid, gid); - return status; -} - -static int -nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) -{ - int status; + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; - if (!S_ISREG(dir->d_inode->i_mode)) { - printk("nfsd4: non-file found in client recovery directory\n"); - return -EINVAL; + status = vfs_llseek(nn->rec_file, 0, SEEK_SET); + if (status < 0) { + nfs4_reset_creds(original_cred); + return status; } - mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_unlink(dir->d_inode, dentry); - mutex_unlock(&dir->d_inode->i_mutex); - return status; -} -static int -nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) -{ - int status; - - /* For now this directory should already be empty, but we empty it of - * any regular files anyway, just in case the directory was created by - * a kernel from the future.... */ - nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); + status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - status = vfs_rmdir(dir->d_inode, dentry); + while (!list_empty(&ctx.names)) { + struct name_list *entry; + entry = list_entry(ctx.names.next, struct name_list, list); + if (!status) { + struct dentry *dentry; + dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + break; + } + status = f(dir, dentry, nn); + dput(dentry); + } + list_del(&entry->list); + kfree(entry); + } mutex_unlock(&dir->d_inode->i_mutex); + nfs4_reset_creds(original_cred); return status; } static int -nfsd4_unlink_clid_dir(char *name, int namlen) +nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) { - struct dentry *dentry; + struct dentry *dir, *dentry; int status; dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); - dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + dir = nn->rec_file->f_path.dentry; + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, dir, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); - return status; + goto out_unlock; } status = -ENOENT; if (!dentry->d_inode) goto out; - - status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); + status = vfs_rmdir(dir->d_inode, dentry); out: dput(dentry); +out_unlock: + mutex_unlock(&dir->d_inode->i_mutex); return status; } -void +static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { - uid_t uid; - gid_t gid; + const struct cred *original_cred; + struct nfs4_client_reclaim *crp; + char dname[HEXDIR_LEN]; int status; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!rec_dir_init || !clp->cl_firststate) + if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - clp->cl_firststate = 0; - nfs4_save_user(&uid, &gid); - status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); - nfs4_reset_user(uid, gid); - if (status == 0) - nfsd4_sync_rec_dir(); + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) + return legacy_recdir_name_error(clp, status); + + status = mnt_want_write_file(nn->rec_file); + if (status) + goto out; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + + status = nfs4_save_creds(&original_cred); + if (status < 0) + goto out_drop_write; + + status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); + nfs4_reset_creds(original_cred); + if (status == 0) { + vfs_fsync(nn->rec_file, 0); + if (nn->in_grace) { + /* remove reclaim record */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) + nfs4_remove_reclaim_record(crp, nn); + } + } +out_drop_write: + mnt_drop_write_file(nn->rec_file); +out: if (status) printk("NFSD: Failed to remove expired client state directory" - " %.*s\n", HEXDIR_LEN, clp->cl_recdir); - return; + " %.*s\n", HEXDIR_LEN, dname); } static int -purge_old(struct dentry *parent, struct dentry *child) +purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { int status; - if (nfs4_has_reclaimed_state(child->d_name.name)) + if (nfs4_has_reclaimed_state(child->d_name.name, nn)) return 0; - status = nfsd4_clear_clid_dir(parent, child); + status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %s\n", - child->d_name.name); + printk("failed to remove client recovery directory %pd\n", + child); /* Keep trying, success or failure: */ return 0; } -void -nfsd4_recdir_purge_old(void) { +static void +nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) +{ int status; - if (!rec_dir_init) + nn->in_grace = false; + if (!nn->rec_file) return; - status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); + status = mnt_want_write_file(nn->rec_file); + if (status) + goto out; + status = nfsd4_list_rec_dir(purge_old, nn); if (status == 0) - nfsd4_sync_rec_dir(); + vfs_fsync(nn->rec_file, 0); + mnt_drop_write_file(nn->rec_file); +out: + nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_dir.path.dentry->d_name.name); - return; + " directory %pD\n", nn->rec_file); } static int -load_recdir(struct dentry *parent, struct dentry *child) +load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %s in recovery directory\n", - child->d_name.name); + printk("nfsd4: illegal name %pd in recovery directory\n", + child); /* Keep trying; maybe the others are OK: */ return 0; } - nfs4_client_to_reclaim(child->d_name.name); + nfs4_client_to_reclaim(child->d_name.name, nn); return 0; } -int -nfsd4_recdir_load(void) { +static int +nfsd4_recdir_load(struct net *net) { int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); + if (!nn->rec_file) + return 0; + + status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_dir.path.dentry->d_name.name); + " directory %pD\n", nn->rec_file); return status; } @@ -384,36 +445,974 @@ nfsd4_recdir_load(void) { * Hold reference to the recovery directory. */ -void -nfsd4_init_recdir(char *rec_dirname) +static int +nfsd4_init_recdir(struct net *net) { - uid_t uid = 0; - gid_t gid = 0; - int status; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + const struct cred *original_cred; + int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", - rec_dirname); + user_recovery_dirname); - BUG_ON(rec_dir_init); + BUG_ON(nn->rec_file); - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) { + printk("NFSD: Unable to change credentials to find recovery" + " directory: error %d\n", + status); + return status; + } - status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, - &rec_dir); - if (status) + nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); + if (IS_ERR(nn->rec_file)) { printk("NFSD: unable to find recovery directory %s\n", - rec_dirname); + user_recovery_dirname); + status = PTR_ERR(nn->rec_file); + nn->rec_file = NULL; + } + nfs4_reset_creds(original_cred); if (!status) - rec_dir_init = 1; - nfs4_reset_user(uid, gid); + nn->in_grace = true; + return status; } -void -nfsd4_shutdown_recdir(void) + +static int +nfs4_legacy_state_init(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; + + nn->reclaim_str_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->reclaim_str_hashtbl) + return -ENOMEM; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]); + nn->reclaim_str_hashtbl_size = 0; + + return 0; +} + +static void +nfs4_legacy_state_shutdown(struct net *net) { - if (!rec_dir_init) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + kfree(nn->reclaim_str_hashtbl); +} + +static int +nfsd4_load_reboot_recovery_data(struct net *net) +{ + int status; + + status = nfsd4_init_recdir(net); + if (!status) + status = nfsd4_recdir_load(net); + if (status) + printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; +} + +static int +nfsd4_legacy_tracking_init(struct net *net) +{ + int status; + + /* XXX: The legacy code won't work in a container */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " + "tracking in a container!\n"); + return -EINVAL; + } + + status = nfs4_legacy_state_init(net); + if (status) + return status; + + status = nfsd4_load_reboot_recovery_data(net); + if (status) + goto err; + return 0; + +err: + nfs4_legacy_state_shutdown(net); + return status; +} + +static void +nfsd4_shutdown_recdir(struct nfsd_net *nn) +{ + if (!nn->rec_file) return; - rec_dir_init = 0; - path_put(&rec_dir.path); + fput(nn->rec_file); + nn->rec_file = NULL; +} + +static void +nfsd4_legacy_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_release_reclaim(nn); + nfsd4_shutdown_recdir(nn); + nfs4_legacy_state_shutdown(net); +} + +/* + * Change the NFSv4 recovery directory to recdir. + */ +int +nfs4_reset_recoverydir(char *recdir) +{ + int status; + struct path path; + + status = kern_path(recdir, LOOKUP_FOLLOW, &path); + if (status) + return status; + status = -ENOTDIR; + if (S_ISDIR(path.dentry->d_inode->i_mode)) { + strcpy(user_recovery_dirname, recdir); + status = 0; + } + path_put(&path); + return status; +} + +char * +nfs4_recoverydir(void) +{ + return user_recovery_dirname; +} + +static int +nfsd4_check_legacy_client(struct nfs4_client *clp) +{ + int status; + char dname[HEXDIR_LEN]; + struct nfs4_client_reclaim *crp; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + /* did we already find that this client is stable? */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + status = nfs4_make_rec_clidname(dname, &clp->cl_name); + if (status) { + legacy_recdir_name_error(clp, status); + return status; + } + + /* look for it in the reclaim hashtable otherwise */ + crp = nfsd4_find_reclaim_client(dname, nn); + if (crp) { + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + crp->cr_clp = clp; + return 0; + } + + return -ENOENT; +} + +static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { + .init = nfsd4_legacy_tracking_init, + .exit = nfsd4_legacy_tracking_exit, + .create = nfsd4_create_clid_dir, + .remove = nfsd4_remove_clid_dir, + .check = nfsd4_check_legacy_client, + .grace_done = nfsd4_recdir_purge_old, +}; + +/* Globals */ +#define NFSD_PIPE_DIR "nfsd" +#define NFSD_CLD_PIPE "cld" + +/* per-net-ns structure for holding cld upcall info */ +struct cld_net { + struct rpc_pipe *cn_pipe; + spinlock_t cn_lock; + struct list_head cn_list; + unsigned int cn_xid; +}; + +struct cld_upcall { + struct list_head cu_list; + struct cld_net *cu_net; + struct task_struct *cu_task; + struct cld_msg cu_msg; +}; + +static int +__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + struct rpc_pipe_msg msg; + + memset(&msg, 0, sizeof(msg)); + msg.data = cmsg; + msg.len = sizeof(*cmsg); + + /* + * Set task state before we queue the upcall. That prevents + * wake_up_process in the downcall from racing with schedule. + */ + set_current_state(TASK_UNINTERRUPTIBLE); + ret = rpc_queue_upcall(pipe, &msg); + if (ret < 0) { + set_current_state(TASK_RUNNING); + goto out; + } + + schedule(); + set_current_state(TASK_RUNNING); + + if (msg.errno < 0) + ret = msg.errno; +out: + return ret; +} + +static int +cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + + /* + * -EAGAIN occurs when pipe is closed and reopened while there are + * upcalls queued. + */ + do { + ret = __cld_pipe_upcall(pipe, cmsg); + } while (ret == -EAGAIN); + + return ret; +} + +static ssize_t +cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) +{ + struct cld_upcall *tmp, *cup; + struct cld_msg __user *cmsg = (struct cld_msg __user *)src; + uint32_t xid; + struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + if (mlen != sizeof(*cmsg)) { + dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, + sizeof(*cmsg)); + return -EINVAL; + } + + /* copy just the xid so we can try to find that */ + if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) { + dprintk("%s: error when copying xid from userspace", __func__); + return -EFAULT; + } + + /* walk the list and find corresponding xid */ + cup = NULL; + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { + cup = tmp; + list_del_init(&cup->cu_list); + break; + } + } + spin_unlock(&cn->cn_lock); + + /* couldn't find upcall? */ + if (!cup) { + dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid); + return -EINVAL; + } + + if (copy_from_user(&cup->cu_msg, src, mlen) != 0) + return -EFAULT; + + wake_up_process(cup->cu_task); + return mlen; +} + +static void +cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct cld_msg *cmsg = msg->data; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, + cu_msg); + + /* errno >= 0 means we got a downcall */ + if (msg->errno >= 0) + return; + + wake_up_process(cup->cu_task); +} + +static const struct rpc_pipe_ops cld_upcall_ops = { + .upcall = rpc_pipe_generic_upcall, + .downcall = cld_pipe_downcall, + .destroy_msg = cld_pipe_destroy_msg, +}; + +static struct dentry * +nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe) +{ + struct dentry *dir, *dentry; + + dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR); + if (dir == NULL) + return ERR_PTR(-ENOENT); + dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); + dput(dir); + return dentry; +} + +static void +nfsd4_cld_unregister_sb(struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static struct dentry * +nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + struct dentry *dentry; + + sb = rpc_get_sb_net(net); + if (!sb) + return NULL; + dentry = nfsd4_cld_register_sb(sb, pipe); + rpc_put_sb_net(net); + return dentry; +} + +static void +nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + + sb = rpc_get_sb_net(net); + if (sb) { + nfsd4_cld_unregister_sb(pipe); + rpc_put_sb_net(net); + } +} + +/* Initialize rpc_pipefs pipe for communication with client tracking daemon */ +static int +nfsd4_init_cld_pipe(struct net *net) +{ + int ret; + struct dentry *dentry; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn; + + if (nn->cld_net) + return 0; + + cn = kzalloc(sizeof(*cn), GFP_KERNEL); + if (!cn) { + ret = -ENOMEM; + goto err; + } + + cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(cn->cn_pipe)) { + ret = PTR_ERR(cn->cn_pipe); + goto err; + } + spin_lock_init(&cn->cn_lock); + INIT_LIST_HEAD(&cn->cn_list); + + dentry = nfsd4_cld_register_net(net, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err_destroy_data; + } + + cn->cn_pipe->dentry = dentry; + nn->cld_net = cn; + return 0; + +err_destroy_data: + rpc_destroy_pipe_data(cn->cn_pipe); +err: + kfree(cn); + printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n", + ret); + return ret; +} + +static void +nfsd4_remove_cld_pipe(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + nfsd4_cld_unregister_net(net, cn->cn_pipe); + rpc_destroy_pipe_data(cn->cn_pipe); + kfree(nn->cld_net); + nn->cld_net = NULL; +} + +static struct cld_upcall * +alloc_cld_upcall(struct cld_net *cn) +{ + struct cld_upcall *new, *tmp; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return new; + + /* FIXME: hard cap on number in flight? */ +restart_search: + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (tmp->cu_msg.cm_xid == cn->cn_xid) { + cn->cn_xid++; + spin_unlock(&cn->cn_lock); + goto restart_search; + } + } + new->cu_task = current; + new->cu_msg.cm_vers = CLD_UPCALL_VERSION; + put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); + new->cu_net = cn; + list_add(&new->cu_list, &cn->cn_list); + spin_unlock(&cn->cn_lock); + + dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid); + + return new; +} + +static void +free_cld_upcall(struct cld_upcall *victim) +{ + struct cld_net *cn = victim->cu_net; + + spin_lock(&cn->cn_lock); + list_del(&victim->cu_list); + spin_unlock(&cn->cn_lock); + kfree(victim); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_create(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already stored */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Create; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to create client " + "record on stable storage: %d\n", ret); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_remove(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already removed */ + if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Remove; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to remove client " + "record from stable storage: %d\n", ret); +} + +/* Check for presence of a record, and update its timestamp */ +static int +nfsd4_cld_check(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if one was already stored during this grace pd */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + cup = alloc_cld_upcall(cn); + if (!cup) { + printk(KERN_ERR "NFSD: Unable to check client record on " + "stable storage: %d\n", -ENOMEM); + return -ENOMEM; + } + + cup->cu_msg.cm_cmd = Cld_Check; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); + return ret; +} + +static void +nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) +{ + int ret; + struct cld_upcall *cup; + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceDone; + cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); +} + +static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { + .init = nfsd4_init_cld_pipe, + .exit = nfsd4_remove_cld_pipe, + .create = nfsd4_cld_create, + .remove = nfsd4_cld_remove, + .check = nfsd4_cld_check, + .grace_done = nfsd4_cld_grace_done, +}; + +/* upcall via usermodehelper */ +static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack"; +module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog), + S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program"); + +static bool cltrack_legacy_disable; +module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(cltrack_legacy_disable, + "Disable legacy recoverydir conversion. Default: false"); + +#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" +#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" + +static char * +nfsd4_cltrack_legacy_topdir(void) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + len = strlen(LEGACY_TOPDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s", + nfs4_recoverydir()); + if (copied >= len) { + /* just return nothing if output was truncated */ + kfree(result); + return NULL; + } + + return result; +} + +static char * +nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) +{ + int copied; + size_t len; + char *result; + + if (cltrack_legacy_disable) + return NULL; + + /* +1 is for '/' between "topdir" and "recdir" */ + len = strlen(LEGACY_RECDIR_ENV_PREFIX) + + strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN; + + result = kmalloc(len, GFP_KERNEL); + if (!result) + return result; + + copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/", + nfs4_recoverydir()); + if (copied > (len - HEXDIR_LEN)) { + /* just return nothing if output will be truncated */ + kfree(result); + return NULL; + } + + copied = nfs4_make_rec_clidname(result + copied, name); + if (copied) { + kfree(result); + return NULL; + } + + return result; +} + +static int +nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) +{ + char *envp[2]; + char *argv[4]; + int ret; + + if (unlikely(!cltrack_prog[0])) { + dprintk("%s: cltrack_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s: cmd: %s\n", __func__, cmd); + dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); + dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); + + envp[0] = legacy; + envp[1] = NULL; + + argv[0] = (char *)cltrack_prog; + argv[1] = cmd; + argv[2] = arg; + argv[3] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or EACCES + * error. The admin can re-enable it on the fly by using sysfs + * once the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + dprintk("NFSD: %s was not found or isn't executable (%d). " + "Setting cltrack_prog to blank string!", + cltrack_prog, ret); + cltrack_prog[0] = '\0'; + } + dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret); + + return ret; +} + +static char * +bin_to_hex_dup(const unsigned char *src, int srclen) +{ + int i; + char *buf, *hex; + + /* +1 for terminating NULL */ + buf = kmalloc((srclen * 2) + 1, GFP_KERNEL); + if (!buf) + return buf; + + hex = buf; + for (i = 0; i < srclen; i++) { + sprintf(hex, "%2.2x", *src++); + hex += 2; + } + return buf; +} + +static int +nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) +{ + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } + return nfsd4_umh_cltrack_upcall("init", NULL, NULL); +} + +static void +nfsd4_umh_cltrack_create(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("create", hexid, NULL); + kfree(hexid); +} + +static void +nfsd4_umh_cltrack_remove(struct nfs4_client *clp) +{ + char *hexid; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return; + } + nfsd4_umh_cltrack_upcall("remove", hexid, NULL); + kfree(hexid); +} + +static int +nfsd4_umh_cltrack_check(struct nfs4_client *clp) +{ + int ret; + char *hexid, *legacy; + + hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); + if (!hexid) { + dprintk("%s: can't allocate memory for upcall!\n", __func__); + return -ENOMEM; + } + legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); + ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); + kfree(legacy); + kfree(hexid); + return ret; +} + +static void +nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, + time_t boot_time) +{ + char *legacy; + char timestr[22]; /* FIXME: better way to determine max size? */ + + sprintf(timestr, "%ld", boot_time); + legacy = nfsd4_cltrack_legacy_topdir(); + nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); + kfree(legacy); +} + +static struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = { + .init = nfsd4_umh_cltrack_init, + .exit = NULL, + .create = nfsd4_umh_cltrack_create, + .remove = nfsd4_umh_cltrack_remove, + .check = nfsd4_umh_cltrack_check, + .grace_done = nfsd4_umh_cltrack_grace_done, +}; + +int +nfsd4_client_tracking_init(struct net *net) +{ + int status; + struct path path; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + /* just run the init if it the method is already decided */ + if (nn->client_tracking_ops) + goto do_init; + + /* + * First, try a UMH upcall. It should succeed or fail quickly, so + * there's little harm in trying that first. + */ + nn->client_tracking_ops = &nfsd4_umh_tracking_ops; + status = nn->client_tracking_ops->init(net); + if (!status) + return status; + + /* + * See if the recoverydir exists and is a directory. If it is, + * then use the legacy ops. + */ + nn->client_tracking_ops = &nfsd4_legacy_tracking_ops; + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); + if (!status) { + status = S_ISDIR(path.dentry->d_inode->i_mode); + path_put(&path); + if (status) + goto do_init; + } + + /* Finally, try to use nfsdcld */ + nn->client_tracking_ops = &nfsd4_cld_tracking_ops; + printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be " + "removed in 3.10. Please transition to using " + "nfsdcltrack.\n"); +do_init: + status = nn->client_tracking_ops->init(net); + if (status) { + printk(KERN_WARNING "NFSD: Unable to initialize client " + "recovery tracking! (%d)\n", status); + nn->client_tracking_ops = NULL; + } + return status; +} + +void +nfsd4_client_tracking_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->client_tracking_ops) { + if (nn->client_tracking_ops->exit) + nn->client_tracking_ops->exit(net); + nn->client_tracking_ops = NULL; + } +} + +void +nfsd4_client_record_create(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->create(clp); +} + +void +nfsd4_client_record_remove(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + nn->client_tracking_ops->remove(clp); +} + +int +nfsd4_client_record_check(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (nn->client_tracking_ops) + return nn->client_tracking_ops->check(clp); + + return -EOPNOTSUPP; +} + +void +nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) +{ + if (nn->client_tracking_ops) + nn->client_tracking_ops->grace_done(nn, boot_time); +} + +static int +rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + struct dentry *dentry; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + if (!cn) { + module_put(THIS_MODULE); + return 0; + } + + switch (event) { + case RPC_PIPEFS_MOUNT: + dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + break; + } + cn->cn_pipe->dentry = dentry; + break; + case RPC_PIPEFS_UMOUNT: + if (cn->cn_pipe->dentry) + nfsd4_cld_unregister_sb(cn->cn_pipe); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfsd4_cld_block = { + .notifier_call = rpc_pipefs_event, +}; + +int +register_cld_notifier(void) +{ + return rpc_pipefs_notifier_register(&nfsd4_cld_block); +} + +void +unregister_cld_notifier(void) +{ + rpc_pipefs_notifier_unregister(&nfsd4_cld_block); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bcb97d8e8b8..2204e1fe572 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1,6 +1,4 @@ /* -* linux/fs/nfsd/nfs4state.c -* * Copyright (c) 2001 The Regents of the University of Michigan. * All rights reserved. * @@ -34,62 +32,63 @@ * */ -#include <linux/param.h> -#include <linux/major.h> +#include <linux/file.h> +#include <linux/fs.h> #include <linux/slab.h> - -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/mount.h> -#include <linux/workqueue.h> -#include <linux/smp_lock.h> -#include <linux/kthread.h> -#include <linux/nfs4.h> -#include <linux/nfsd/state.h> -#include <linux/nfsd/xdr4.h> #include <linux/namei.h> #include <linux/swap.h> -#include <linux/mutex.h> -#include <linux/lockd/bind.h> -#include <linux/module.h> +#include <linux/pagemap.h> +#include <linux/ratelimit.h> +#include <linux/sunrpc/svcauth_gss.h> +#include <linux/sunrpc/addr.h> +#include <linux/hash.h> +#include "xdr4.h" +#include "xdr4cb.h" +#include "vfs.h" +#include "current_stateid.h" + +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -/* Globals */ -static time_t lease_time = 90; /* default lease time */ -static time_t user_lease_time = 90; -static time_t boot_time; -static int in_grace = 1; -static u32 current_ownerid = 1; -static u32 current_fileid = 1; -static u32 current_delegid = 1; -static u32 nfs4_init; -static stateid_t zerostateid; /* bits all 0 */ -static stateid_t onestateid; /* bits all 1 */ - -#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) -#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) +#define all_ones {{~0,~0},~0} +static const stateid_t one_stateid = { + .si_generation = ~0, + .si_opaque = all_ones, +}; +static const stateid_t zero_stateid = { + /* all fields zero */ +}; +static const stateid_t currentstateid = { + .si_generation = 1, +}; + +static u64 current_sessionid = 1; + +#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) +#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) +#define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ -static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); -static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); -static void release_stateid_lockowners(struct nfs4_stateid *open_stp); -static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; -static void nfs4_set_recdir(char *recdir); +static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); -/* Locking: - * - * client_mutex: - * protects clientid_hashtbl[], clientstr_hashtbl[], - * unconfstr_hashtbl[], uncofid_hashtbl[]. - */ +/* Locking: */ + +/* Currently used for almost all code touching nfsv4 state: */ static DEFINE_MUTEX(client_mutex); -static struct kmem_cache *stateowner_slab = NULL; -static struct kmem_cache *file_slab = NULL; -static struct kmem_cache *stateid_slab = NULL; -static struct kmem_cache *deleg_slab = NULL; +/* + * Currently used for the del_recall_lru and file hash table. In an + * effort to decrease the scope of the client_mutex, this spinlock may + * eventually cover more: + */ +static DEFINE_SPINLOCK(state_lock); + +static struct kmem_cache *openowner_slab; +static struct kmem_cache *lockowner_slab; +static struct kmem_cache *file_slab; +static struct kmem_cache *stateid_slab; +static struct kmem_cache *deleg_slab; void nfs4_lock_state(void) @@ -97,12 +96,125 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } +static void free_session(struct nfsd4_session *); + +static bool is_session_dead(struct nfsd4_session *ses) +{ + return ses->se_flags & NFS4_SESSION_DEAD; +} + +void nfsd4_put_session(struct nfsd4_session *ses) +{ + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); +} + +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) +{ + if (atomic_read(&ses->se_ref) > ref_held_by_me) + return nfserr_jukebox; + ses->se_flags |= NFS4_SESSION_DEAD; + return nfs_ok; +} + +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) +{ + if (is_session_dead(ses)) + return nfserr_badsession; + atomic_inc(&ses->se_ref); + return nfs_ok; +} + void nfs4_unlock_state(void) { mutex_unlock(&client_mutex); } +static bool is_client_expired(struct nfs4_client *clp) +{ + return clp->cl_time == 0; +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + clp->cl_time = 0; + return nfs_ok; +} + +static __be32 mark_client_expired(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + __be32 ret; + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + return ret; +} + +static __be32 get_client_locked(struct nfs4_client *clp) +{ + if (is_client_expired(clp)) + return nfserr_expired; + atomic_inc(&clp->cl_refcount); + return nfs_ok; +} + +/* must be called under the client_lock */ +static inline void +renew_client_locked(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (is_client_expired(clp)) { + WARN_ON(1); + printk("%s: client (clientid %08x/%08x) already expired\n", + __func__, + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + return; + } + + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &nn->client_lru); + clp->cl_time = get_seconds(); +} + +static inline void +renew_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static void put_client_renew_locked(struct nfs4_client *clp) +{ + if (!atomic_dec_and_test(&clp->cl_refcount)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); +} + +void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -116,221 +228,936 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -/* forward declarations */ -static void release_stateowner(struct nfs4_stateowner *sop); -static void release_stateid(struct nfs4_stateid *stp, int flags); - -/* - * Delegation state - */ - -/* recall_lock protects the del_recall_lru */ -static DEFINE_SPINLOCK(recall_lock); -static struct list_head del_recall_lru; - -static void -free_nfs4_file(struct kref *kref) +static void nfsd4_free_file(struct nfs4_file *f) { - struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref); - list_del(&fp->fi_hash); - iput(fp->fi_inode); - kmem_cache_free(file_slab, fp); + kmem_cache_free(file_slab, f); } static inline void put_nfs4_file(struct nfs4_file *fi) { - kref_put(&fi->fi_ref, free_nfs4_file); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + hlist_del(&fi->fi_hash); + spin_unlock(&state_lock); + iput(fi->fi_inode); + nfsd4_free_file(fi); + } } static inline void get_nfs4_file(struct nfs4_file *fi) { - kref_get(&fi->fi_ref); + atomic_inc(&fi->fi_ref); } static int num_delegations; -unsigned int max_delegations; +unsigned long max_delegations; /* * Open owner state (share locks) */ -/* hash tables for nfs4_stateowner */ +/* hash tables for lock and open owners */ #define OWNER_HASH_BITS 8 #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -#define ownerid_hashval(id) \ - ((id) & OWNER_HASH_MASK) -#define ownerstr_hashval(clientid, ownername) \ - (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK) +static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +{ + unsigned int ret; -static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE]; -static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; + ret = opaque_hashval(ownername->data, ownername->len); + ret += clientid; + return ret & OWNER_HASH_MASK; +} /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -#define FILE_HASH_MASK (FILE_HASH_SIZE - 1) -/* hash table for (open)nfs4_stateid */ -#define STATEID_HASH_BITS 10 -#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) -#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1) -#define file_hashval(x) \ - hash_ptr(x, FILE_HASH_BITS) -#define stateid_hashval(owner_id, file_id) \ - (((owner_id) + (file_id)) & STATEID_HASH_MASK) +static unsigned int file_hashval(struct inode *ino) +{ + /* XXX: why are we hashing on inode pointer, anyway? */ + return hash_ptr(ino, FILE_HASH_BITS); +} + +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + +static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); + atomic_inc(&fp->fi_access[oflag]); +} + +static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_get_access(fp, O_RDONLY); + __nfs4_file_get_access(fp, O_WRONLY); + } else + __nfs4_file_get_access(fp, oflag); +} + +static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +{ + if (fp->fi_fds[oflag]) { + fput(fp->fi_fds[oflag]); + fp->fi_fds[oflag] = NULL; + } +} + +static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (atomic_dec_and_test(&fp->fi_access[oflag])) { + nfs4_file_put_fd(fp, oflag); + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) + nfs4_file_put_fd(fp, O_RDWR); + } +} + +static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +{ + if (oflag == O_RDWR) { + __nfs4_file_put_access(fp, O_RDONLY); + __nfs4_file_put_access(fp, O_WRONLY); + } else + __nfs4_file_put_access(fp, oflag); +} + +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) +{ + struct idr *stateids = &cl->cl_stateids; + struct nfs4_stid *stid; + int new_id; + + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + if (new_id < 0) + goto out_free; + stid->sc_client = cl; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; + /* Will be incremented before return to client: */ + stid->sc_stateid.si_generation = 0; + + /* + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): + */ + return stid; +out_free: + kmem_cache_free(slab, stid); + return NULL; +} + +static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +{ + return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); +} + +/* + * When we recall a delegation, we should be careful not to hand it + * out again straight away. + * To ensure this we keep a pair of bloom filters ('new' and 'old') + * in which the filehandles of recalled delegations are "stored". + * If a filehandle appear in either filter, a delegation is blocked. + * When a delegation is recalled, the filehandle is stored in the "new" + * filter. + * Every 30 seconds we swap the filters and clear the "new" one, + * unless both are empty of course. + * + * Each filter is 256 bits. We hash the filehandle to 32bit and use the + * low 3 bytes as hash-table indices. + * + * 'state_lock', which is always held when block_delegations() is called, + * is used to manage concurrent access. Testing does not need the lock + * except when swapping the two filters. + */ +static struct bloom_pair { + int entries, old_entries; + time_t swap_time; + int new; /* index into 'set' */ + DECLARE_BITMAP(set[2], 256); +} blocked_delegations; + +static int delegation_blocked(struct knfsd_fh *fh) +{ + u32 hash; + struct bloom_pair *bd = &blocked_delegations; -static struct list_head file_hashtbl[FILE_HASH_SIZE]; -static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; + if (bd->entries == 0) + return 0; + if (seconds_since_boot() - bd->swap_time > 30) { + spin_lock(&state_lock); + if (seconds_since_boot() - bd->swap_time > 30) { + bd->entries -= bd->old_entries; + bd->old_entries = bd->entries; + memset(bd->set[bd->new], 0, + sizeof(bd->set[0])); + bd->new = 1-bd->new; + bd->swap_time = seconds_since_boot(); + } + spin_unlock(&state_lock); + } + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + if (test_bit(hash&255, bd->set[0]) && + test_bit((hash>>8)&255, bd->set[0]) && + test_bit((hash>>16)&255, bd->set[0])) + return 1; + + if (test_bit(hash&255, bd->set[1]) && + test_bit((hash>>8)&255, bd->set[1]) && + test_bit((hash>>16)&255, bd->set[1])) + return 1; + + return 0; +} + +static void block_delegations(struct knfsd_fh *fh) +{ + u32 hash; + struct bloom_pair *bd = &blocked_delegations; + + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + + __set_bit(hash&255, bd->set[bd->new]); + __set_bit((hash>>8)&255, bd->set[bd->new]); + __set_bit((hash>>16)&255, bd->set[bd->new]); + if (bd->entries == 0) + bd->swap_time = seconds_since_boot(); + bd->entries += 1; +} static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; - struct nfs4_file *fp = stp->st_file; - struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; dprintk("NFSD alloc_init_deleg\n"); - if (fp->fi_had_conflict) - return NULL; if (num_delegations > max_delegations) return NULL; - dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); + if (delegation_blocked(¤t_fh->fh_handle)) + return NULL; + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; + /* + * delegation seqid's are never incremented. The 4.1 special + * meaning of seqid 0 isn't meaningful, really, but let's avoid + * 0 anyway just for consistency and use 1: + */ + dp->dl_stid.sc_stateid.si_generation = 1; num_delegations++; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - dp->dl_client = clp; - get_nfs4_file(fp); - dp->dl_file = fp; - dp->dl_flock = NULL; - get_file(stp->st_vfs_file); - dp->dl_vfs_file = stp->st_vfs_file; - dp->dl_type = type; - dp->dl_recall.cbr_dp = NULL; - dp->dl_recall.cbr_ident = cb->cb_ident; - dp->dl_recall.cbr_trunc = 0; - dp->dl_stateid.si_boot = boot_time; - dp->dl_stateid.si_stateownerid = current_delegid++; - dp->dl_stateid.si_fileid = 0; - dp->dl_stateid.si_generation = 0; - dp->dl_fhlen = current_fh->fh_handle.fh_size; - memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, - current_fh->fh_handle.fh_size); + dp->dl_file = NULL; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; + fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &clp->cl_delegations); + nfsd4_init_callback(&dp->dl_recall); return dp; } +static void remove_stid(struct nfs4_stid *s) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +} + +static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) +{ + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - dprintk("NFSD: freeing dp %p\n",dp); - put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; } } -/* Remove the associated file_lock first, then remove the delegation. - * lease_modify() is called to remove the FS_LEASE file_lock from - * the i_flock list, eventually calling nfsd's lock_manager - * fl_release_callback. - */ +static void nfs4_put_deleg_lease(struct nfs4_file *fp) +{ + if (!fp->fi_lease) + return; + if (atomic_dec_and_test(&fp->fi_delegees)) { + vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); + fp->fi_lease = NULL; + fput(fp->fi_deleg_file); + fp->fi_deleg_file = NULL; + } +} + +static void unhash_stid(struct nfs4_stid *s) +{ + s->sc_type = 0; +} + static void -nfs4_close_delegation(struct nfs4_delegation *dp) +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { - struct file *filp = dp->dl_vfs_file; + lockdep_assert_held(&state_lock); - dprintk("NFSD: close_delegation dp %p\n",dp); - dp->dl_vfs_file = NULL; - /* The following nfsd_close may not actually close the file, - * but we want to remove the lease in any case. */ - if (dp->dl_flock) - vfs_setlease(filp, F_UNLCK, &dp->dl_flock); - nfsd_close(filp); + dp->dl_stid.sc_type = NFS4_DELEG_STID; + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perfile); + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); - spin_lock(&recall_lock); + list_del_init(&dp->dl_perfile); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&state_lock); + if (dp->dl_file) { + nfs4_put_deleg_lease(dp->dl_file); + put_nfs4_file(dp->dl_file); + dp->dl_file = NULL; + } +} + + + +static void destroy_revoked_delegation(struct nfs4_delegation *dp) +{ list_del_init(&dp->dl_recall_lru); - spin_unlock(&recall_lock); - nfs4_close_delegation(dp); + remove_stid(&dp->dl_stid); + nfs4_put_delegation(dp); +} + +static void destroy_delegation(struct nfs4_delegation *dp) +{ + unhash_delegation(dp); + remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } +static void revoke_delegation(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (clp->cl_minorversion == 0) + destroy_delegation(dp); + else { + unhash_delegation(dp); + dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + } +} + /* * SETCLIENTID state */ -/* Hash tables for nfs4_clientid state */ -#define CLIENT_HASH_BITS 4 -#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) -#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) +static unsigned int clientid_hashval(u32 id) +{ + return id & CLIENT_HASH_MASK; +} + +static unsigned int clientstr_hashval(const char *name) +{ + return opaque_hashval(name, 8) & CLIENT_HASH_MASK; +} -#define clientid_hashval(id) \ - ((id) & CLIENT_HASH_MASK) -#define clientstr_hashval(name) \ - (opaque_hashval((name), 8) & CLIENT_HASH_MASK) /* - * reclaim_str_hashtbl[] holds known client info from previous reset/reboot - * used in reboot/reset lease grace period processing - * - * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed - * setclientid_confirmed info. + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation of rfc 3530 14.2.19 that the server + * return an error if the client attempt to downgrade to a combination + * of share bits not explicable by closing some of its previous opens. * - * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed - * setclientid info. + * XXX: This enforcement is actually incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: * - * client_lru holds client queue ordered by nfs4_client.cl_time - * for lease renewal. + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none * - * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time - * for last close replay. + * which we should reject. */ -static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; -static int reclaim_str_hashtbl_size = 0; -static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; -static struct list_head client_lru; -static struct list_head close_lru; +static unsigned int +bmap_to_share_mode(unsigned long bmap) { + int i; + unsigned int access = 0; + + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + access |= i; + } + return access; +} + +static bool +test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { + unsigned int access, deny; + + access = bmap_to_share_mode(stp->st_access_bmap); + deny = bmap_to_share_mode(stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return false; + return true; +} +/* set share access for a given stateid */ static inline void -renew_client(struct nfs4_client *clp) +set_access(u32 access, struct nfs4_ol_stateid *stp) { - /* - * Move client to the end to the LRU list. - */ - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); - list_move_tail(&clp->cl_lru, &client_lru); - clp->cl_time = get_seconds(); + __set_bit(access, &stp->st_access_bmap); +} + +/* clear share access for a given stateid */ +static inline void +clear_access(u32 access, struct nfs4_ol_stateid *stp) +{ + __clear_bit(access, &stp->st_access_bmap); +} + +/* test whether a given stateid has access */ +static inline bool +test_access(u32 access, struct nfs4_ol_stateid *stp) +{ + return test_bit(access, &stp->st_access_bmap); +} + +/* set share deny for a given stateid */ +static inline void +set_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + __set_bit(access, &stp->st_deny_bmap); +} + +/* clear share deny for a given stateid */ +static inline void +clear_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + __clear_bit(access, &stp->st_deny_bmap); +} + +/* test whether a given stateid is denying specific access */ +static inline bool +test_deny(u32 access, struct nfs4_ol_stateid *stp) +{ + return test_bit(access, &stp->st_deny_bmap); +} + +static int nfs4_access_to_omode(u32 access) +{ + switch (access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_READ: + return O_RDONLY; + case NFS4_SHARE_ACCESS_WRITE: + return O_WRONLY; + case NFS4_SHARE_ACCESS_BOTH: + return O_RDWR; + } + WARN_ON_ONCE(1); + return O_RDONLY; +} + +/* release all access and file references for a given stateid */ +static void +release_all_access(struct nfs4_ol_stateid *stp) +{ + int i; + + for (i = 1; i < 4; i++) { + if (test_access(i, stp)) + nfs4_file_put_access(stp->st_file, + nfs4_access_to_omode(i)); + clear_access(i, stp); + } +} + +static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) +{ + list_del(&stp->st_perfile); + list_del(&stp->st_perstateowner); +} + +static void close_generic_stateid(struct nfs4_ol_stateid *stp) +{ + release_all_access(stp); + put_nfs4_file(stp->st_file); + stp->st_file = NULL; +} + +static void free_generic_stateid(struct nfs4_ol_stateid *stp) +{ + remove_stid(&stp->st_stid); + nfs4_free_stid(stateid_slab, &stp->st_stid); +} + +static void release_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct file *file; + + unhash_generic_stateid(stp); + unhash_stid(&stp->st_stid); + file = find_any_file(stp->st_file); + if (file) + locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); + close_generic_stateid(stp); + free_generic_stateid(stp); +} + +static void unhash_lockowner(struct nfs4_lockowner *lo) +{ + struct nfs4_ol_stateid *stp; + + list_del(&lo->lo_owner.so_strhash); + list_del(&lo->lo_perstateid); + list_del(&lo->lo_owner_ino_hash); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + release_lock_stateid(stp); + } +} + +static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +{ + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} + +static void release_lockowner(struct nfs4_lockowner *lo) +{ + unhash_lockowner(lo); + nfs4_free_lockowner(lo); +} + +static void +release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) +{ + struct nfs4_lockowner *lo; + + while (!list_empty(&open_stp->st_lockowners)) { + lo = list_entry(open_stp->st_lockowners.next, + struct nfs4_lockowner, lo_perstateid); + release_lockowner(lo); + } +} + +static void unhash_open_stateid(struct nfs4_ol_stateid *stp) +{ + unhash_generic_stateid(stp); + release_stateid_lockowners(stp); + close_generic_stateid(stp); +} + +static void release_open_stateid(struct nfs4_ol_stateid *stp) +{ + unhash_open_stateid(stp); + free_generic_stateid(stp); +} + +static void unhash_openowner(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *stp; + + list_del(&oo->oo_owner.so_strhash); + list_del(&oo->oo_perclient); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + release_open_stateid(stp); + } +} + +static void release_last_closed_stateid(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + + if (s) { + free_generic_stateid(s); + oo->oo_last_closed_stid = NULL; + } +} + +static void nfs4_free_openowner(struct nfs4_openowner *oo) +{ + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); +} + +static void release_openowner(struct nfs4_openowner *oo) +{ + unhash_openowner(oo); + list_del(&oo->oo_close_lru); + release_last_closed_stateid(oo); + nfs4_free_openowner(oo); +} + +static inline int +hash_sessionid(struct nfs4_sessionid *sessionid) +{ + struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; + + return sid->sequence % SESSION_HASH_SIZE; +} + +#ifdef NFSD_DEBUG +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ + u32 *ptr = (u32 *)(&sessionid->data[0]); + dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); +} +#else +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ +} +#endif + +/* + * Bump the seqid on cstate->replay_owner, and clear replay_owner if it + * won't be used for replay. + */ +void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (nfserr == nfserr_replay_me) + return; + + if (!seqid_mutating_err(ntohl(nfserr))) { + cstate->replay_owner = NULL; + return; + } + if (!so) + return; + if (so->so_is_open_owner) + release_last_closed_stateid(openowner(so)); + so->so_seqid++; + return; +} + +static void +gen_sessionid(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_sessionid *sid; + + sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; + sid->clientid = clp->cl_clientid; + sid->sequence = current_sessionid++; + sid->reserved = 0; +} + +/* + * The protocol defines ca_maxresponssize_cached to include the size of + * the rpc header, but all we need to cache is the data starting after + * the end of the initial SEQUENCE operation--the rest we regenerate + * each time. Therefore we can advertise a ca_maxresponssize_cached + * value that is the number of bytes in our cache plus a few additional + * bytes. In order to stay on the safe side, and not promise more than + * we can cache, those additional bytes must be the minimum possible: 24 + * bytes of rpc header (xid through accept state, with AUTH_NULL + * verifier), 12 for the compound header (with zero-length tag), and 44 + * for the SEQUENCE op response: + */ +#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) + +static void +free_session_slots(struct nfsd4_session *ses) +{ + int i; + + for (i = 0; i < ses->se_fchannel.maxreqs; i++) + kfree(ses->se_slots[i]); +} + +/* + * We don't actually need to cache the rpc and session headers, so we + * can allocate a little less for each slot: + */ +static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) +{ + u32 size; + + if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) + size = 0; + else + size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + return size + sizeof(struct nfsd4_slot); +} + +/* + * XXX: If we run out of reserved DRC memory we could (up to a point) + * re-negotiate active sessions and reduce their slot usage to make + * room for new connections. For now we just fail the create session. + */ +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) +{ + u32 slotsize = slot_bytes(ca); + u32 num = ca->maxreqs; + int avail; + + spin_lock(&nfsd_drc_lock); + avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem - nfsd_drc_mem_used); + num = min_t(int, num, avail / slotsize); + nfsd_drc_mem_used += num * slotsize; + spin_unlock(&nfsd_drc_lock); + + return num; +} + +static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) +{ + int slotsize = slot_bytes(ca); + + spin_lock(&nfsd_drc_lock); + nfsd_drc_mem_used -= slotsize * ca->maxreqs; + spin_unlock(&nfsd_drc_lock); +} + +static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, + struct nfsd4_channel_attrs *battrs) +{ + int numslots = fattrs->maxreqs; + int slotsize = slot_bytes(fattrs); + struct nfsd4_session *new; + int mem, i; + + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + mem = numslots * sizeof(struct nfsd4_slot *); + + new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); + if (!new) + return NULL; + /* allocate each struct nfsd4_slot and data cache in one piece */ + for (i = 0; i < numslots; i++) { + new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); + if (!new->se_slots[i]) + goto out_free; + } + + memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); + + return new; +out_free: + while (i--) + kfree(new->se_slots[i]); + kfree(new); + return NULL; +} + +static void free_conn(struct nfsd4_conn *c) +{ + svc_xprt_put(c->cn_xprt); + kfree(c); +} + +static void nfsd4_conn_lost(struct svc_xpt_user *u) +{ + struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); + struct nfs4_client *clp = c->cn_session->se_client; + + spin_lock(&clp->cl_lock); + if (!list_empty(&c->cn_persession)) { + list_del(&c->cn_persession); + free_conn(c); + } + nfsd4_probe_callback(clp); + spin_unlock(&clp->cl_lock); +} + +static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) +{ + struct nfsd4_conn *conn; + + conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); + if (!conn) + return NULL; + svc_xprt_get(rqstp->rq_xprt); + conn->cn_xprt = rqstp->rq_xprt; + conn->cn_flags = flags; + INIT_LIST_HEAD(&conn->cn_xpt_user.list); + return conn; +} + +static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + conn->cn_session = ses; + list_add(&conn->cn_persession, &ses->se_conns); +} + +static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + + spin_lock(&clp->cl_lock); + __nfsd4_hash_conn(conn, ses); + spin_unlock(&clp->cl_lock); +} + +static int nfsd4_register_conn(struct nfsd4_conn *conn) +{ + conn->cn_xpt_user.callback = nfsd4_conn_lost; + return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); +} + +static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses) +{ + int ret; + + nfsd4_hash_conn(conn, ses); + ret = nfsd4_register_conn(conn); + if (ret) + /* oops; xprt is already down: */ + nfsd4_conn_lost(&conn->cn_xpt_user); + if (conn->cn_flags & NFS4_CDFC4_BACK) { + /* callback channel may be back up */ + nfsd4_probe_callback(ses->se_client); + } +} + +static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) +{ + u32 dir = NFS4_CDFC4_FORE; + + if (cses->flags & SESSION4_BACK_CHAN) + dir |= NFS4_CDFC4_BACK; + return alloc_conn(rqstp, dir); +} + +/* must be called under client_lock */ +static void nfsd4_del_conns(struct nfsd4_session *s) +{ + struct nfs4_client *clp = s->se_client; + struct nfsd4_conn *c; + + spin_lock(&clp->cl_lock); + while (!list_empty(&s->se_conns)) { + c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); + list_del_init(&c->cn_persession); + spin_unlock(&clp->cl_lock); + + unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); + free_conn(c); + + spin_lock(&clp->cl_lock); + } + spin_unlock(&clp->cl_lock); +} + +static void __free_session(struct nfsd4_session *ses) +{ + free_session_slots(ses); + kfree(ses); +} + +static void free_session(struct nfsd4_session *ses) +{ + struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + nfsd4_del_conns(ses); + nfsd4_put_drc_mem(&ses->se_fchannel); + __free_session(ses); +} + +static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) +{ + int idx; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + new->se_client = clp; + gen_sessionid(new); + + INIT_LIST_HEAD(&new->se_conns); + + new->se_cb_seq_nr = 1; + new->se_flags = cses->flags; + new->se_cb_prog = cses->callback_prog; + new->se_cb_sec = cses->cb_sec; + atomic_set(&new->se_ref, 0); + idx = hash_sessionid(&new->se_sessionid); + spin_lock(&nn->client_lock); + list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); + spin_lock(&clp->cl_lock); + list_add(&new->se_perclnt, &clp->cl_sessions); + spin_unlock(&clp->cl_lock); + spin_unlock(&nn->client_lock); + + if (cses->flags & SESSION4_BACK_CHAN) { + struct sockaddr *sa = svc_addr(rqstp); + /* + * This is a little silly; with sessions there's no real + * use for the callback address. Use the peer address + * as a reasonable default for now, but consider fixing + * the rpc client not to require an address in the + * future: + */ + rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); + clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); + } +} + +/* caller must hold client_lock */ +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) +{ + struct nfsd4_session *elem; + int idx; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + dump_sessionid(__func__, sessionid); + idx = hash_sessionid(sessionid); + /* Search in the appropriate list */ + list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) { + if (!memcmp(elem->se_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN)) { + return elem; + } + } + + dprintk("%s: session not found\n", __func__); + return NULL; +} + +/* caller must hold client_lock */ +static void +unhash_session(struct nfsd4_session *ses) +{ + list_del(&ses->se_hash); + spin_lock(&ses->se_client->cl_lock); + list_del(&ses->se_perclnt); + spin_unlock(&ses->se_client->cl_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ static int -STALE_CLIENTID(clientid_t *clid) +STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) { - if (clid->cl_boot == boot_time) + if (clid->cl_boot == nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x)\n", - clid->cl_boot, clid->cl_id); + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -346,99 +1173,109 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); + clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); if (clp->cl_name.data == NULL) { kfree(clp); return NULL; } - memcpy(clp->cl_name.data, name.data, name.len); clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } static void -shutdown_callback_client(struct nfs4_client *clp) +free_client(struct nfs4_client *clp) { - struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); - if (clnt) { - /* - * Callback threads take a reference on the client, so there - * should be no outstanding callbacks at this point. - */ - clp->cl_callback.cb_client = NULL; - rpc_shutdown_client(clnt); + lockdep_assert_held(&nn->client_lock); + while (!list_empty(&clp->cl_sessions)) { + struct nfsd4_session *ses; + ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, + se_perclnt); + list_del(&ses->se_perclnt); + WARN_ON_ONCE(atomic_read(&ses->se_ref)); + free_session(ses); } -} - -static inline void -free_client(struct nfs4_client *clp) -{ - shutdown_callback_client(clp); - if (clp->cl_cred.cr_group_info) - put_group_info(clp->cl_cred.cr_group_info); + rpc_destroy_wait_queue(&clp->cl_cb_waitq); + free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } -void -put_nfs4_client(struct nfs4_client *clp) +/* must be called under the client_lock */ +static inline void +unhash_client_locked(struct nfs4_client *clp) { - if (atomic_dec_and_test(&clp->cl_count)) - free_client(clp); + struct nfsd4_session *ses; + + list_del(&clp->cl_lru); + spin_lock(&clp->cl_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + list_del_init(&ses->se_hash); + spin_unlock(&clp->cl_lock); } static void -expire_client(struct nfs4_client *clp) +destroy_client(struct nfs4_client *clp) { - struct nfs4_stateowner *sop; + struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; - - dprintk("NFSD: expire_client cl_count %d\n", - atomic_read(&clp->cl_count)); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); + spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - dprintk("NFSD: expire client. dp %p, fp %p\n", dp, - dp->dl_flock); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); + } + list_splice_init(&clp->cl_revoked, &reaplist); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + destroy_revoked_delegation(dp); } - list_del(&clp->cl_idhash); - list_del(&clp->cl_strhash); - list_del(&clp->cl_lru); while (!list_empty(&clp->cl_openowners)) { - sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); - release_stateowner(sop); + oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + release_openowner(oo); } - put_nfs4_client(clp); + nfsd4_shutdown_callback(clp); + if (clp->cl_cb_conn.cb_xprt) + svc_xprt_put(clp->cl_cb_conn.cb_xprt); + list_del(&clp->cl_idhash); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + spin_lock(&nn->client_lock); + unhash_client_locked(clp); + WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); + free_client(clp); + spin_unlock(&nn->client_lock); } -static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) +static void expire_client(struct nfs4_client *clp) { - struct nfs4_client *clp; - - clp = alloc_client(name); - if (clp == NULL) - return NULL; - memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_count, 1); - atomic_set(&clp->cl_callback.cb_set, 0); - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - return clp; + nfsd4_client_record_remove(clp); + destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -453,12 +1290,35 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) target->cl_clientid.cl_id = source->cl_clientid.cl_id; } -static void copy_cred(struct svc_cred *target, struct svc_cred *source) +static int copy_cred(struct svc_cred *target, struct svc_cred *source) { + if (source->cr_principal) { + target->cr_principal = + kstrdup(source->cr_principal, GFP_KERNEL); + if (target->cr_principal == NULL) + return -ENOMEM; + } else + target->cr_principal = NULL; + target->cr_flavor = source->cr_flavor; target->cr_uid = source->cr_uid; target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); + return 0; +} + +static long long +compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) +{ + long long res; + + res = o1->len - o2->len; + if (res) + return res; + return (long long)memcmp(o1->data, o2->data, o1->len); } static int same_name(const char *n1, const char *n2) @@ -478,315 +1338,1198 @@ same_clid(clientid_t *cl1, clientid_t *cl2) return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); } -/* XXX what about NGROUP */ -static int +static bool groups_equal(struct group_info *g1, struct group_info *g2) +{ + int i; + + if (g1->ngroups != g2->ngroups) + return false; + for (i=0; i<g1->ngroups; i++) + if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i))) + return false; + return true; +} + +/* + * RFC 3530 language requires clid_inuse be returned when the + * "principal" associated with a requests differs from that previously + * used. We use uid, gid's, and gss principal string as our best + * approximation. We also don't want to allow non-gss use of a client + * established using gss: in theory cr_principal should catch that + * change, but in practice cr_principal can be null even in the gss case + * since gssd doesn't always pass down a principal string. + */ +static bool is_gss_cred(struct svc_cred *cr) +{ + /* Is cr_flavor one of the gss "pseudoflavors"?: */ + return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR); +} + + +static bool same_creds(struct svc_cred *cr1, struct svc_cred *cr2) { - return cr1->cr_uid == cr2->cr_uid; + if ((is_gss_cred(cr1) != is_gss_cred(cr2)) + || (!uid_eq(cr1->cr_uid, cr2->cr_uid)) + || (!gid_eq(cr1->cr_gid, cr2->cr_gid)) + || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) + return false; + if (cr1->cr_principal == cr2->cr_principal) + return true; + if (!cr1->cr_principal || !cr2->cr_principal) + return false; + return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); +} + +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + if (!cr->cr_gss_mech) + return false; + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } -static void gen_clid(struct nfs4_client *clp) +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; - clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_boot = nn->boot_time; clp->cl_clientid.cl_id = current_clientid++; } static void gen_confirm(struct nfs4_client *clp) { + __be32 verf[2]; static u32 i; - u32 *p; - p = (u32 *)clp->cl_confirm.data; - *p++ = get_seconds(); - *p++ = i++; + verf[0] = (__be32)get_seconds(); + verf[1] = (__be32)i++; + memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } -static int check_name(struct xdr_netobj name) +static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - if (name.len == 0) - return 0; - if (name.len > NFS4_OPAQUE_LIMIT) { - dprintk("NFSD: check_name: name too long(%d)!\n", name.len); - return 0; + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; +} + +static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +{ + struct nfs4_stid *s; + + s = find_stateid(cl, t); + if (!s) + return NULL; + if (typemask & s->sc_type) + return s; + return NULL; +} + +static struct nfs4_client *create_client(struct xdr_netobj name, + struct svc_rqst *rqstp, nfs4_verifier *verf) +{ + struct nfs4_client *clp; + struct sockaddr *sa = svc_addr(rqstp); + int ret; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + clp = alloc_client(name); + if (clp == NULL) + return NULL; + + ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); + if (ret) { + spin_lock(&nn->client_lock); + free_client(clp); + spin_unlock(&nn->client_lock); + return NULL; } - return 1; + nfsd4_init_callback(&clp->cl_cb_null); + clp->cl_time = get_seconds(); + clear_bit(0, &clp->cl_cb_slot_busy); + copy_verf(clp, verf); + rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + gen_confirm(clp); + clp->cl_cb_session = NULL; + clp->net = net; + return clp; +} + +static void +add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct nfs4_client *clp; + + while (*new) { + clp = rb_entry(*new, struct nfs4_client, cl_namenode); + parent = *new; + + if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&new_clp->cl_namenode, parent, new); + rb_insert_color(&new_clp->cl_namenode, root); +} + +static struct nfs4_client * +find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) +{ + long long cmp; + struct rb_node *node = root->rb_node; + struct nfs4_client *clp; + + while (node) { + clp = rb_entry(node, struct nfs4_client, cl_namenode); + cmp = compare_blob(&clp->cl_name, name); + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + return clp; + } + return NULL; } static void -add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +add_to_unconfirmed(struct nfs4_client *clp) { unsigned int idhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); - list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); - list_add_tail(&clp->cl_lru, &client_lru); - clp->cl_time = get_seconds(); + list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); + renew_client(clp); } static void move_to_confirmed(struct nfs4_client *clp) { unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); - unsigned int strhashval; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); - list_del_init(&clp->cl_strhash); - list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); - strhashval = clientstr_hashval(clp->cl_recdir); - list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + add_clp_to_name_tree(clp, &nn->conf_name_tree); + set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); renew_client(clp); } static struct nfs4_client * -find_confirmed_client(clientid_t *clid) +find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) { struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) + list_for_each_entry(clp, &tbl[idhashval], cl_idhash) { + if (same_clid(&clp->cl_clientid, clid)) { + if ((bool)clp->cl_minorversion != sessions) + return NULL; + renew_client(clp); return clp; + } } return NULL; } static struct nfs4_client * -find_unconfirmed_client(clientid_t *clid) +find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - struct nfs4_client *clp; - unsigned int idhashval = clientid_hashval(clid->cl_id); + struct list_head *tbl = nn->conf_id_hashtbl; - list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) - return clp; - } - return NULL; + return find_client_in_id_table(tbl, clid, sessions); } static struct nfs4_client * -find_confirmed_client_by_str(const char *dname, unsigned int hashval) +find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - struct nfs4_client *clp; + struct list_head *tbl = nn->unconf_id_hashtbl; - list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; - } - return NULL; + return find_client_in_id_table(tbl, clid, sessions); } +static bool clp_used_exchangeid(struct nfs4_client *clp) +{ + return clp->cl_exchange_flags != 0; +} + static struct nfs4_client * -find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) +find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { - struct nfs4_client *clp; + return find_clp_in_name_tree(name, &nn->conf_name_tree); +} - list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { - if (same_name(clp->cl_recdir, dname)) - return clp; +static struct nfs4_client * +find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) +{ + return find_clp_in_name_tree(name, &nn->unconf_name_tree); +} + +static void +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) +{ + struct nfs4_cb_conn *conn = &clp->cl_cb_conn; + struct sockaddr *sa = svc_addr(rqstp); + u32 scopeid = rpc_get_scope_id(sa); + unsigned short expected_family; + + /* Currently, we only support tcp and tcp6 for the callback channel */ + if (se->se_callback_netid_len == 3 && + !memcmp(se->se_callback_netid_val, "tcp", 3)) + expected_family = AF_INET; + else if (se->se_callback_netid_len == 4 && + !memcmp(se->se_callback_netid_val, "tcp6", 4)) + expected_family = AF_INET6; + else + goto out_err; + + conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *)&conn->cb_addr, + sizeof(conn->cb_addr)); + + if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) + goto out_err; + + if (conn->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; + + conn->cb_prog = se->se_callback_prog; + conn->cb_ident = se->se_callback_ident; + memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); + return; +out_err: + conn->cb_addr.ss_family = AF_UNSPEC; + conn->cb_addrlen = 0; + dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + "will not receive delegations\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + + return; +} + +/* + * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. + */ +void +nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) +{ + struct xdr_buf *buf = resp->xdr.buf; + struct nfsd4_slot *slot = resp->cstate.slot; + unsigned int base; + + dprintk("--> %s slot %p\n", __func__, slot); + + slot->sl_opcnt = resp->opcnt; + slot->sl_status = resp->cstate.status; + + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; + if (nfsd4_not_cached(resp)) { + slot->sl_datalen = 0; + return; } - return NULL; + base = resp->cstate.data_offset; + slot->sl_datalen = buf->len - base; + if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) + WARN("%s: sessions DRC could not cache compound\n", __func__); + return; } -/* a helper function for parse_callback */ -static int -parse_octet(unsigned int *lenp, char **addrp) +/* + * Encode the replay sequence operation from the slot values. + * If cachethis is FALSE encode the uncached rep error on the next + * operation which sets resp->p and increments resp->opcnt for + * nfs4svc_encode_compoundres. + * + */ +static __be32 +nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) { - unsigned int len = *lenp; - char *p = *addrp; - int n = -1; - char c; + struct nfsd4_op *op; + struct nfsd4_slot *slot = resp->cstate.slot; - for (;;) { - if (!len) - break; - len--; - c = *p++; - if (c == '.') - break; - if ((c < '0') || (c > '9')) { - n = -1; - break; - } - if (n < 0) - n = 0; - n = (n * 10) + (c - '0'); - if (n > 255) { - n = -1; - break; - } + /* Encode the replayed sequence operation */ + op = &args->ops[resp->opcnt - 1]; + nfsd4_encode_operation(resp, op); + + /* Return nfserr_retry_uncached_rep in next operation. */ + if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { + op = &args->ops[resp->opcnt++]; + op->status = nfserr_retry_uncached_rep; + nfsd4_encode_operation(resp, op); } - *lenp = len; - *addrp = p; - return n; + return op->status; } -/* parse and set the setclientid ipv4 callback address */ -static int -parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) -{ - int temp = 0; - u32 cbaddr = 0; - u16 cbport = 0; - u32 addrlen = addr_len; - char *addr = addr_val; - int i, shift; - - /* ipaddress */ - shift = 24; - for(i = 4; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbaddr |= (temp << shift); - if (shift > 0) - shift -= 8; +/* + * The sequence operation is not cached because we can use the slot and + * session values. + */ +static __be32 +nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, + struct nfsd4_sequence *seq) +{ + struct nfsd4_slot *slot = resp->cstate.slot; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + __be32 status; + + dprintk("--> %s slot %p\n", __func__, slot); + + status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); + if (status) + return status; + + p = xdr_reserve_space(xdr, slot->sl_datalen); + if (!p) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen); + xdr_commit_encode(xdr); + + resp->opcnt = slot->sl_opcnt; + return slot->sl_status; +} + +/* + * Set the exchange_id flags returned by the server. + */ +static void +nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) +{ + /* pNFS is not supported */ + new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; + + /* Referrals are supported, Migration is not. */ + new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; + + /* set the wire flags to return to client. */ + clid->flags = new->cl_exchange_flags; +} + +static bool client_has_state(struct nfs4_client *clp) +{ + /* + * Note clp->cl_openowners check isn't quite right: there's no + * need to count owners without stateid's. + * + * Also note we should probably be using this in 4.0 case too. + */ + return !list_empty(&clp->cl_openowners) + || !list_empty(&clp->cl_delegations) + || !list_empty(&clp->cl_sessions); +} + +__be32 +nfsd4_exchange_id(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_exchange_id *exid) +{ + struct nfs4_client *unconf, *conf, *new; + __be32 status; + char addr_str[INET6_ADDRSTRLEN]; + nfs4_verifier verf = exid->verifier; + struct sockaddr *sa = svc_addr(rqstp); + bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + rpc_ntop(sa, addr_str, sizeof(addr_str)); + dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " + "ip_addr=%s flags %x, spa_how %d\n", + __func__, rqstp, exid, exid->clname.len, exid->clname.data, + addr_str, exid->flags, exid->spa_how); + + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) + return nfserr_inval; + + switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; + case SP4_NONE: + break; + default: /* checked by xdr code */ + WARN_ON_ONCE(1); + case SP4_SSV: + return nfserr_encr_alg_unsupp; } - *cbaddrp = cbaddr; - /* port */ - shift = 8; - for(i = 2; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; + /* Cases below refer to rfc 5661 section 18.35.4: */ + nfs4_lock_state(); + conf = find_confirmed_client_by_name(&exid->clname, nn); + if (conf) { + bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); + bool verfs_match = same_verf(&verf, &conf->cl_verifier); + + if (update) { + if (!clp_used_exchangeid(conf)) { /* buggy client */ + status = nfserr_inval; + goto out; + } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } + if (!creds_match) { /* case 9 */ + status = nfserr_perm; + goto out; + } + if (!verfs_match) { /* case 8 */ + status = nfserr_not_same; + goto out; + } + /* case 6 */ + exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + new = conf; + goto out_copy; } - cbport |= (temp << shift); - if (shift > 0) - shift -= 8; + if (!creds_match) { /* case 3 */ + if (client_has_state(conf)) { + status = nfserr_clid_inuse; + goto out; + } + expire_client(conf); + goto out_new; + } + if (verfs_match) { /* case 2 */ + conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + new = conf; + goto out_copy; + } + /* case 5, client reboot */ + goto out_new; } - *cbportp = cbport; - return 1; + + if (update) { /* case 7 */ + status = nfserr_noent; + goto out; + } + + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); + if (unconf) /* case 4, possible retry or client restart */ + expire_client(unconf); + + /* case 1 (normal case) */ +out_new: + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) { + status = nfserr_jukebox; + goto out; + } + new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); + + gen_clid(new, nn); + add_to_unconfirmed(new); +out_copy: + exid->clientid.cl_boot = new->cl_clientid.cl_boot; + exid->clientid.cl_id = new->cl_clientid.cl_id; + + exid->seqid = new->cl_cs_slot.sl_seqid + 1; + nfsd4_set_ex_flags(new, exid); + + dprintk("nfsd4_exchange_id seqid %d flags %x\n", + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); + status = nfs_ok; + +out: + nfs4_unlock_state(); + return status; +} + +static __be32 +check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) +{ + dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, + slot_seqid); + + /* The slot is in use, and no response has been sent. */ + if (slot_inuse) { + if (seqid == slot_seqid) + return nfserr_jukebox; + else + return nfserr_seq_misordered; + } + /* Note unsigned 32-bit arithmetic handles wraparound: */ + if (likely(seqid == slot_seqid + 1)) + return nfs_ok; + if (seqid == slot_seqid) + return nfserr_replay_cache; + return nfserr_seq_misordered; } +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, __be32 nfserr) { - struct nfs4_callback *cb = &clp->cl_callback; + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} - /* Currently, we only support tcp for the callback channel */ - if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) - goto out_err; +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + +#define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ + 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* version, opcount, opcode */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, cache */ \ + 4 ) * sizeof(__be32)) + +#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ + 2 + /* verifier: AUTH_NULL, length 0 */\ + 1 + /* status */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* opcount, opcode, opstatus*/ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, slotID, status */ \ + 5 ) * sizeof(__be32)) + +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +{ + u32 maxrpc = nn->nfsd_serv->sv_max_mesg; + + if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ) + return nfserr_toosmall; + ca->headerpadsz = 0; + ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); + ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); + ca->maxresp_cached = min_t(u32, ca->maxresp_cached, + NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); + ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); + /* + * Note decreasing slot size below client's request may make it + * difficult for client to function correctly, whereas + * decreasing the number of slots will (just?) affect + * performance. When short on memory we therefore prefer to + * decrease number of slots instead of their size. Clients that + * request larger slots than they need will get poor results: + */ + ca->maxreqs = nfsd4_get_drc_mem(ca); + if (!ca->maxreqs) + return nfserr_jukebox; - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) - goto out_err; - cb->cb_prog = se->se_callback_prog; - cb->cb_ident = se->se_callback_ident; - return; -out_err: - dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " - "will not receive delegations\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + return nfs_ok; +} - return; +#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \ + RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32)) +#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \ + RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32)) + +static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) +{ + ca->headerpadsz = 0; + + /* + * These RPC_MAX_HEADER macros are overkill, especially since we + * don't even do gss on the backchannel yet. But this is still + * less than 1k. Tighten up this estimate in the unlikely event + * it turns out to be a problem for some client: + */ + if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ) + return nfserr_toosmall; + if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ) + return nfserr_toosmall; + ca->maxresp_cached = 0; + if (ca->maxops < 2) + return nfserr_toosmall; + + return nfs_ok; +} + +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) +{ + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } } __be32 -nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_setclientid *setclid) +nfsd4_create_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_create_session *cr_ses) { - struct sockaddr_in *sin = svc_addr_in(rqstp); - struct xdr_netobj clname = { - .len = setclid->se_namelen, - .data = setclid->se_name, - }; - nfs4_verifier clverifier = setclid->se_verf; - unsigned int strhashval; - struct nfs4_client *conf, *unconf, *new; - __be32 status; - char dname[HEXDIR_LEN]; - - if (!check_name(clname)) - return nfserr_inval; + struct sockaddr *sa = svc_addr(rqstp); + struct nfs4_client *conf, *unconf; + struct nfsd4_session *new; + struct nfsd4_conn *conn; + struct nfsd4_clid_slot *cs_slot = NULL; + __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - status = nfs4_make_rec_clidname(dname, &clname); + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) + return nfserr_inval; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; + status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; + status = check_backchannel_attrs(&cr_ses->back_channel); + if (status) + goto out_release_drc_mem; + status = nfserr_jukebox; + new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); + if (!new) + goto out_release_drc_mem; + conn = alloc_conn_from_crses(rqstp, cr_ses); + if (!conn) + goto out_free_session; - /* - * XXX The Duplicate Request Cache (DRC) has been checked (??) - * We get here on a DRC miss. + nfs4_lock_state(); + unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); + conf = find_confirmed_client(&cr_ses->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); + + if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); + if (status == nfserr_replay_cache) { + status = nfsd4_replay_create_session(cr_ses, cs_slot); + goto out_free_conn; + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { + status = nfserr_seq_misordered; + goto out_free_conn; + } + } else if (unconf) { + struct nfs4_client *old; + if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { + status = nfserr_clid_inuse; + goto out_free_conn; + } + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); + if (status) { + /* an unconfirmed replay returns misordered */ + status = nfserr_seq_misordered; + goto out_free_conn; + } + old = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (old) { + status = mark_client_expired(old); + if (status) + goto out_free_conn; + expire_client(old); + } + move_to_confirmed(unconf); + conf = unconf; + } else { + status = nfserr_stale_clientid; + goto out_free_conn; + } + status = nfs_ok; + /* + * We do not support RDMA or persistent sessions */ + cr_ses->flags &= ~SESSION4_PERSIST; + cr_ses->flags &= ~SESSION4_RDMA; + + init_session(rqstp, new, conf, cr_ses); + nfsd4_init_conn(rqstp, conn, new); + + memcpy(cr_ses->sessionid.data, new->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); + nfs4_unlock_state(); + return status; +out_free_conn: + nfs4_unlock_state(); + free_conn(conn); +out_free_session: + __free_session(new); +out_release_drc_mem: + nfsd4_put_drc_mem(&cr_ses->fore_channel); + return status; +} + +static __be32 nfsd4_map_bcts_dir(u32 *dir) +{ + switch (*dir) { + case NFS4_CDFC4_FORE: + case NFS4_CDFC4_BACK: + return nfs_ok; + case NFS4_CDFC4_FORE_OR_BOTH: + case NFS4_CDFC4_BACK_OR_BOTH: + *dir = NFS4_CDFC4_BOTH; + return nfs_ok; + }; + return nfserr_inval; +} + +__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc) +{ + struct nfsd4_session *session = cstate->session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; + spin_lock(&nn->client_lock); + session->se_cb_prog = bc->bc_cb_program; + session->se_cb_sec = bc->bc_cb_sec; + spin_unlock(&nn->client_lock); - strhashval = clientstr_hashval(dname); + nfsd4_probe_callback(session->se_client); + return nfs_ok; +} + +__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_bind_conn_to_session *bcts) +{ + __be32 status; + struct nfsd4_conn *conn; + struct nfsd4_session *session; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + if (!nfsd4_last_compound_op(rqstp)) + return nfserr_not_only_op; nfs4_lock_state(); - conf = find_confirmed_client_by_str(dname, strhashval); - if (conf) { - /* RFC 3530 14.2.33 CASE 0: */ - status = nfserr_clid_inuse; - if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) - || conf->cl_addr != sin->sin_addr.s_addr) { - dprintk("NFSD: setclientid: string in use by client" - "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr)); + spin_lock(&nn->client_lock); + session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); + spin_unlock(&nn->client_lock); + status = nfserr_badsession; + if (!session) + goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; + status = nfsd4_map_bcts_dir(&bcts->dir); + if (status) + goto out; + conn = alloc_conn(rqstp, bcts->dir); + status = nfserr_jukebox; + if (!conn) + goto out; + nfsd4_init_conn(rqstp, conn, session); + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; +} + +static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) +{ + if (!session) + return 0; + return !memcmp(sid, &session->se_sessionid, sizeof(*sid)); +} + +__be32 +nfsd4_destroy_session(struct svc_rqst *r, + struct nfsd4_compound_state *cstate, + struct nfsd4_destroy_session *sessionid) +{ + struct nfsd4_session *ses; + __be32 status; + int ref_held_by_me = 0; + struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); + + nfs4_lock_state(); + status = nfserr_not_only_op; + if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { + if (!nfsd4_last_compound_op(r)) goto out; + ref_held_by_me++; + } + dump_sessionid(__func__, &sessionid->sessionid); + spin_lock(&nn->client_lock); + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); + status = nfserr_badsession; + if (!ses) + goto out_client_lock; + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) + goto out_client_lock; + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); + if (status) + goto out_put_session; + unhash_session(ses); + spin_unlock(&nn->client_lock); + + nfsd4_probe_callback_sync(ses->se_client); + + spin_lock(&nn->client_lock); + status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); +out_client_lock: + spin_unlock(&nn->client_lock); +out: + nfs4_unlock_state(); + return status; +} + +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; } } + return NULL; +} + +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd4_conn *c; + __be32 status = nfs_ok; + int ret; + + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(new->cn_xprt, ses); + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; + __nfsd4_hash_conn(new, ses); + spin_unlock(&clp->cl_lock); + ret = nfsd4_register_conn(new); + if (ret) + /* oops; xprt is already down: */ + nfsd4_conn_lost(&new->cn_xpt_user); + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; +} + +static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) +{ + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + return args->opcnt > session->se_fchannel.maxops; +} + +static bool nfsd4_request_too_big(struct svc_rqst *rqstp, + struct nfsd4_session *session) +{ + struct xdr_buf *xb = &rqstp->rq_arg; + + return xb->len > session->se_fchannel.maxreq_sz; +} + +__be32 +nfsd4_sequence(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_sequence *seq) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct xdr_stream *xdr = &resp->xdr; + struct nfsd4_session *session; + struct nfs4_client *clp; + struct nfsd4_slot *slot; + struct nfsd4_conn *conn; + __be32 status; + int buflen; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + if (resp->opcnt != 1) + return nfserr_sequence_pos; + /* - * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") - * has a description of SETCLIENTID request processing consisting - * of 5 bullet points, labeled as CASE0 - CASE4 below. + * Will be either used or freed by nfsd4_sequence_check_conn + * below. */ - unconf = find_unconfirmed_client_by_str(dname, strhashval); - status = nfserr_resource; - if (!conf) { - /* - * RFC 3530 14.2.33 CASE 4: - * placed first, because it is the normal case - */ - if (unconf) - expire_client(unconf); - new = create_client(clname, dname); - if (new == NULL) + conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); + if (!conn) + return nfserr_jukebox; + + spin_lock(&nn->client_lock); + status = nfserr_badsession; + session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); + if (!session) + goto out_no_session; + clp = session->se_client; + status = get_client_locked(clp); + if (status) + goto out_no_session; + status = nfsd4_get_session_locked(session); + if (status) + goto out_put_client; + + status = nfserr_too_many_ops; + if (nfsd4_session_too_many_ops(rqstp, session)) + goto out_put_session; + + status = nfserr_req_too_big; + if (nfsd4_request_too_big(rqstp, session)) + goto out_put_session; + + status = nfserr_badslot; + if (seq->slotid >= session->se_fchannel.maxreqs) + goto out_put_session; + + slot = session->se_slots[seq->slotid]; + dprintk("%s: slotid %d\n", __func__, seq->slotid); + + /* We do not negotiate the number of slots yet, so set the + * maxslots to the session maxreqs which is used to encode + * sr_highest_slotid and the sr_target_slot id to maxslots */ + seq->maxslots = session->se_fchannel.maxreqs; + + status = check_slot_seqid(seq->seqid, slot->sl_seqid, + slot->sl_flags & NFSD4_SLOT_INUSE); + if (status == nfserr_replay_cache) { + status = nfserr_seq_misordered; + if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) + goto out_put_session; + cstate->slot = slot; + cstate->session = session; + /* Return the cached reply status and set cstate->status + * for nfsd4_proc_compound processing */ + status = nfsd4_replay_cache_entry(resp, seq); + cstate->status = nfserr_replay_cache; + goto out; + } + if (status) + goto out_put_session; + + status = nfsd4_sequence_check_conn(conn, session); + conn = NULL; + if (status) + goto out_put_session; + + buflen = (seq->cachethis) ? + session->se_fchannel.maxresp_cached : + session->se_fchannel.maxresp_sz; + status = (seq->cachethis) ? nfserr_rep_too_big_to_cache : + nfserr_rep_too_big; + if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) + goto out_put_session; + svc_reserve(rqstp, buflen); + + status = nfs_ok; + /* Success! bump slot seqid */ + slot->sl_seqid = seq->seqid; + slot->sl_flags |= NFSD4_SLOT_INUSE; + if (seq->cachethis) + slot->sl_flags |= NFSD4_SLOT_CACHETHIS; + else + slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS; + + cstate->slot = slot; + cstate->session = session; + +out: + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; + } + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; +out_no_session: + if (conn) + free_conn(conn); + spin_unlock(&nn->client_lock); + return status; +out_put_session: + nfsd4_put_session(session); +out_put_client: + put_client_renew_locked(clp); + goto out_no_session; +} + +__be32 +nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) +{ + struct nfs4_client *conf, *unconf, *clp; + __be32 status = 0; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + nfs4_lock_state(); + unconf = find_unconfirmed_client(&dc->clientid, true, nn); + conf = find_confirmed_client(&dc->clientid, true, nn); + WARN_ON_ONCE(conf && unconf); + + if (conf) { + clp = conf; + + if (client_has_state(conf)) { + status = nfserr_clientid_busy; goto out; - gen_clid(new); - } else if (same_verf(&conf->cl_verifier, &clverifier)) { - /* - * RFC 3530 14.2.33 CASE 1: - * probable callback update - */ - if (unconf) { - /* Note this is removing unconfirmed {*x***}, - * which is stronger than RFC recommended {vxc**}. - * This has the advantage that there is at most - * one {*x***} in either list at any time. - */ - expire_client(unconf); } - new = create_client(clname, dname); - if (new == NULL) - goto out; - copy_clid(new, conf); - } else if (!unconf) { + } else if (unconf) + clp = unconf; + else { + status = nfserr_stale_clientid; + goto out; + } + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } + expire_client(clp); +out: + nfs4_unlock_state(); + return status; +} + +__be32 +nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) +{ + __be32 status = 0; + + if (rc->rca_one_fs) { + if (!cstate->current_fh.fh_dentry) + return nfserr_nofilehandle; /* - * RFC 3530 14.2.33 CASE 2: - * probable client reboot; state will be removed if - * confirmed. + * We don't take advantage of the rca_one_fs case. + * That's OK, it's optional, we can safely ignore it. */ - new = create_client(clname, dname); - if (new == NULL) - goto out; - gen_clid(new); - } else { + return nfs_ok; + } + + nfs4_lock_state(); + status = nfserr_complete_already; + if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags)) + goto out; + + status = nfserr_stale_clientid; + if (is_client_expired(cstate->session->se_client)) /* - * RFC 3530 14.2.33 CASE 3: - * probable client reboot; state will be removed if - * confirmed. + * The following error isn't really legal. + * But we only get here if the client just explicitly + * destroyed the client. Surely it no longer cares what + * error it gets back on an operation for the dead + * client. */ - expire_client(unconf); - new = create_client(clname, dname); - if (new == NULL) + goto out; + + status = nfs_ok; + nfsd4_client_record_create(cstate->session->se_client); +out: + nfs4_unlock_state(); + return status; +} + +__be32 +nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_setclientid *setclid) +{ + struct xdr_netobj clname = setclid->se_name; + nfs4_verifier clverifier = setclid->se_verf; + struct nfs4_client *conf, *unconf, *new; + __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + /* Cases below refer to rfc 3530 section 14.2.33: */ + nfs4_lock_state(); + conf = find_confirmed_client_by_name(&clname, nn); + if (conf) { + /* case 0: */ + status = nfserr_clid_inuse; + if (clp_used_exchangeid(conf)) + goto out; + if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); goto out; - gen_clid(new); - } - copy_verf(new, &clverifier); - new->cl_addr = sin->sin_addr.s_addr; - copy_cred(&new->cl_cred, &rqstp->rq_cred); - gen_confirm(new); - gen_callback(new, setclid); - add_to_unconfirmed(new, strhashval); + } + } + unconf = find_unconfirmed_client_by_name(&clname, nn); + if (unconf) + expire_client(unconf); + status = nfserr_jukebox; + new = create_client(clname, rqstp, &clverifier); + if (new == NULL) + goto out; + if (conf && same_verf(&conf->cl_verifier, &clverifier)) + /* case 1: probable callback update */ + copy_clid(new, conf); + else /* case 4 (new client) or cases 2, 3 (client reboot): */ + gen_clid(new, nn); + new->cl_minorversion = 0; + gen_callback(new, setclid, rqstp); + add_to_unconfirmed(new); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); @@ -797,327 +2540,213 @@ out: } -/* - * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has - * a description of SETCLIENTID_CONFIRM request processing consisting of 4 - * bullets, labeled as CASE1 - CASE4 below. - */ __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr_in *sin = svc_addr_in(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if (STALE_CLIENTID(clid)) + if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - /* - * XXX The Duplicate Request Cache (DRC) has been checked (??) - * We get here on a DRC miss. - */ - nfs4_lock_state(); - conf = find_confirmed_client(clid); - unconf = find_unconfirmed_client(clid); - - status = nfserr_clid_inuse; - if (conf && conf->cl_addr != sin->sin_addr.s_addr) - goto out; - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) - goto out; - + conf = find_confirmed_client(clid, false, nn); + unconf = find_unconfirmed_client(clid, false, nn); /* - * section 14.2.34 of RFC 3530 has a description of - * SETCLIENTID_CONFIRM request processing consisting - * of 4 bullet points, labeled as CASE1 - CASE4 below. + * We try hard to give out unique clientid's, so if we get an + * attempt to confirm the same clientid with a different cred, + * there's a bug somewhere. Let's charitably assume it's our + * bug. */ - if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { - /* - * RFC 3530 14.2.34 CASE 1: - * callback update - */ - if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) - status = nfserr_clid_inuse; - else { - /* XXX: We just turn off callbacks until we can handle - * change request correctly. */ - atomic_set(&conf->cl_callback.cb_set, 0); - gen_confirm(conf); - nfsd4_remove_clid_dir(unconf); - expire_client(unconf); - status = nfs_ok; - - } - } else if (conf && !unconf) { - /* - * RFC 3530 14.2.34 CASE 2: - * probable retransmitted request; play it safe and - * do nothing. - */ - if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) - status = nfserr_clid_inuse; - else - status = nfs_ok; - } else if (!conf && unconf - && same_verf(&unconf->cl_confirm, &confirm)) { - /* - * RFC 3530 14.2.34 CASE 3: - * Normal case; new or rebooted client: - */ - if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { - status = nfserr_clid_inuse; - } else { - unsigned int hash = - clientstr_hashval(unconf->cl_recdir); - conf = find_confirmed_client_by_str(unconf->cl_recdir, - hash); - if (conf) { - nfsd4_remove_clid_dir(conf); - expire_client(conf); - } - move_to_confirmed(unconf); - conf = unconf; - nfsd4_probe_callback(conf); + status = nfserr_serverfault; + if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) + goto out; + if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) + goto out; + /* cases below refer to rfc 3530 section 14.2.34: */ + if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { + if (conf && !unconf) /* case 2: probable retransmit */ status = nfs_ok; + else /* case 4: client hasn't noticed we rebooted yet? */ + status = nfserr_stale_clientid; + goto out; + } + status = nfs_ok; + if (conf) { /* case 1: callback update */ + nfsd4_change_callback(conf, &unconf->cl_cb_conn); + nfsd4_probe_callback(conf); + expire_client(unconf); + } else { /* case 3: normal case; new or rebooted client */ + conf = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (conf) { + status = mark_client_expired(conf); + if (status) + goto out; + expire_client(conf); } - } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) - && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, - &confirm)))) { - /* - * RFC 3530 14.2.34 CASE 4: - * Client probably hasn't noticed that we rebooted yet. - */ - status = nfserr_stale_clientid; - } else { - /* check that we have hit one of the cases...*/ - status = nfserr_clid_inuse; + move_to_confirmed(unconf); + nfsd4_probe_callback(unconf); } out: nfs4_unlock_state(); return status; } -/* OPEN Share state helper functions */ -static inline struct nfs4_file * -alloc_init_file(struct inode *ino) +static struct nfs4_file *nfsd4_alloc_file(void) { - struct nfs4_file *fp; - unsigned int hashval = file_hashval(ino); - - fp = kmem_cache_alloc(file_slab, GFP_KERNEL); - if (fp) { - kref_init(&fp->fi_ref); - INIT_LIST_HEAD(&fp->fi_hash); - INIT_LIST_HEAD(&fp->fi_stateids); - INIT_LIST_HEAD(&fp->fi_delegations); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); - fp->fi_inode = igrab(ino); - fp->fi_id = current_fileid++; - fp->fi_had_conflict = false; - return fp; - } - return NULL; + return kmem_cache_alloc(file_slab, GFP_KERNEL); } -static void -nfsd4_free_slab(struct kmem_cache **slab) +/* OPEN Share state helper functions */ +static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) { - if (*slab == NULL) - return; - kmem_cache_destroy(*slab); - *slab = NULL; + unsigned int hashval = file_hashval(ino); + + atomic_set(&fp->fi_ref, 1); + INIT_LIST_HEAD(&fp->fi_stateids); + INIT_LIST_HEAD(&fp->fi_delegations); + fp->fi_inode = igrab(ino); + fp->fi_had_conflict = false; + fp->fi_lease = NULL; + memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); + memset(fp->fi_access, 0, sizeof(fp->fi_access)); + spin_lock(&state_lock); + hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&state_lock); } void nfsd4_free_slabs(void) { - nfsd4_free_slab(&stateowner_slab); - nfsd4_free_slab(&file_slab); - nfsd4_free_slab(&stateid_slab); - nfsd4_free_slab(&deleg_slab); + kmem_cache_destroy(openowner_slab); + kmem_cache_destroy(lockowner_slab); + kmem_cache_destroy(file_slab); + kmem_cache_destroy(stateid_slab); + kmem_cache_destroy(deleg_slab); } -static int +int nfsd4_init_slabs(void) { - stateowner_slab = kmem_cache_create("nfsd4_stateowners", - sizeof(struct nfs4_stateowner), 0, 0, NULL); - if (stateowner_slab == NULL) - goto out_nomem; + openowner_slab = kmem_cache_create("nfsd4_openowners", + sizeof(struct nfs4_openowner), 0, 0, NULL); + if (openowner_slab == NULL) + goto out; + lockowner_slab = kmem_cache_create("nfsd4_lockowners", + sizeof(struct nfs4_lockowner), 0, 0, NULL); + if (lockowner_slab == NULL) + goto out_free_openowner_slab; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) - goto out_nomem; + goto out_free_lockowner_slab; stateid_slab = kmem_cache_create("nfsd4_stateids", - sizeof(struct nfs4_stateid), 0, 0, NULL); + sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) - goto out_nomem; + goto out_free_file_slab; deleg_slab = kmem_cache_create("nfsd4_delegations", sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) - goto out_nomem; + goto out_free_stateid_slab; return 0; -out_nomem: - nfsd4_free_slabs(); + +out_free_stateid_slab: + kmem_cache_destroy(stateid_slab); +out_free_file_slab: + kmem_cache_destroy(file_slab); +out_free_lockowner_slab: + kmem_cache_destroy(lockowner_slab); +out_free_openowner_slab: + kmem_cache_destroy(openowner_slab); +out: dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } -void -nfs4_free_stateowner(struct kref *kref) +static void init_nfs4_replay(struct nfs4_replay *rp) { - struct nfs4_stateowner *sop = - container_of(kref, struct nfs4_stateowner, so_ref); - kfree(sop->so_owner.data); - kmem_cache_free(stateowner_slab, sop); + rp->rp_status = nfserr_serverfault; + rp->rp_buflen = 0; + rp->rp_buf = rp->rp_ibuf; } -static inline struct nfs4_stateowner * -alloc_stateowner(struct xdr_netobj *owner) +static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) { struct nfs4_stateowner *sop; - if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { - if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { - memcpy(sop->so_owner.data, owner->data, owner->len); - sop->so_owner.len = owner->len; - kref_init(&sop->so_ref); - return sop; - } - kmem_cache_free(stateowner_slab, sop); - } - return NULL; -} - -static struct nfs4_stateowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { - struct nfs4_stateowner *sop; - struct nfs4_replay *rp; - unsigned int idhashval; + sop = kmem_cache_alloc(slab, GFP_KERNEL); + if (!sop) + return NULL; - if (!(sop = alloc_stateowner(&open->op_owner))) + sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + if (!sop->so_owner.data) { + kmem_cache_free(slab, sop); return NULL; - idhashval = ownerid_hashval(current_ownerid); - INIT_LIST_HEAD(&sop->so_idhash); - INIT_LIST_HEAD(&sop->so_strhash); - INIT_LIST_HEAD(&sop->so_perclient); + } + sop->so_owner.len = owner->len; + INIT_LIST_HEAD(&sop->so_stateids); - INIT_LIST_HEAD(&sop->so_perstateid); /* not used */ - INIT_LIST_HEAD(&sop->so_close_lru); - sop->so_time = 0; - list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); - list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perclient, &clp->cl_openowners); - sop->so_is_open_owner = 1; - sop->so_id = current_ownerid++; sop->so_client = clp; - sop->so_seqid = open->op_seqid; - sop->so_confirmed = 0; - rp = &sop->so_replay; - rp->rp_status = nfserr_serverfault; - rp->rp_buflen = 0; - rp->rp_buf = rp->rp_ibuf; + init_nfs4_replay(&sop->so_replay); return sop; } -static void -release_stateid_lockowners(struct nfs4_stateid *open_stp) +static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - struct nfs4_stateowner *lock_sop; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - while (!list_empty(&open_stp->st_lockowners)) { - lock_sop = list_entry(open_stp->st_lockowners.next, - struct nfs4_stateowner, so_perstateid); - /* list_del(&open_stp->st_lockowners); */ - BUG_ON(lock_sop->so_is_open_owner); - release_stateowner(lock_sop); - } + list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_perclient, &clp->cl_openowners); } -static void -unhash_stateowner(struct nfs4_stateowner *sop) -{ - struct nfs4_stateid *stp; - - list_del(&sop->so_idhash); - list_del(&sop->so_strhash); - if (sop->so_is_open_owner) - list_del(&sop->so_perclient); - list_del(&sop->so_perstateid); - while (!list_empty(&sop->so_stateids)) { - stp = list_entry(sop->so_stateids.next, - struct nfs4_stateid, st_perstateowner); - if (sop->so_is_open_owner) - release_stateid(stp, OPEN_STATE); - else - release_stateid(stp, LOCK_STATE); - } -} +static struct nfs4_openowner * +alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { + struct nfs4_openowner *oo; -static void -release_stateowner(struct nfs4_stateowner *sop) -{ - unhash_stateowner(sop); - list_del(&sop->so_close_lru); - nfs4_put_stateowner(sop); + oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!oo) + return NULL; + oo->oo_owner.so_is_open_owner = 1; + oo->oo_owner.so_seqid = open->op_seqid; + oo->oo_flags = NFS4_OO_NEW; + oo->oo_time = 0; + oo->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&oo->oo_close_lru); + hash_openowner(oo, clp, strhashval); + return oo; } -static inline void -init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { - struct nfs4_stateowner *sop = open->op_stateowner; - unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); +static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { + struct nfs4_openowner *oo = open->op_openowner; - INIT_LIST_HEAD(&stp->st_hash); - INIT_LIST_HEAD(&stp->st_perstateowner); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); - INIT_LIST_HEAD(&stp->st_perfile); - list_add(&stp->st_hash, &stateid_hashtbl[hashval]); - list_add(&stp->st_perstateowner, &sop->so_stateids); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); - stp->st_stateowner = sop; + stp->st_stateowner = &oo->oo_owner; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; - stp->st_stateid.si_stateownerid = sop->so_id; - stp->st_stateid.si_fileid = fp->fi_id; - stp->st_stateid.si_generation = 0; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - __set_bit(open->op_share_access, &stp->st_access_bmap); - __set_bit(open->op_share_deny, &stp->st_deny_bmap); + set_access(open->op_share_access, stp); + set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; } static void -release_stateid(struct nfs4_stateid *stp, int flags) +move_to_close_lru(struct nfs4_openowner *oo, struct net *net) { - struct file *filp = stp->st_vfs_file; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - list_del(&stp->st_hash); - list_del(&stp->st_perfile); - list_del(&stp->st_perstateowner); - if (flags & OPEN_STATE) { - release_stateid_lockowners(stp); - stp->st_vfs_file = NULL; - nfsd_close(filp); - } else if (flags & LOCK_STATE) - locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); - put_nfs4_file(stp->st_file); - kmem_cache_free(stateid_slab, stp); -} + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); -static void -move_to_close_lru(struct nfs4_stateowner *sop) -{ - dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); - - list_move_tail(&sop->so_close_lru, &close_lru); - sop->so_time = get_seconds(); + list_move_tail(&oo->oo_close_lru, &nn->close_lru); + oo->oo_time = get_seconds(); } static int @@ -1129,14 +2758,25 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, (sop->so_client->cl_clientid.cl_id == clid->cl_id); } -static struct nfs4_stateowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) +static struct nfs4_openowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) { - struct nfs4_stateowner *so = NULL; + struct nfs4_stateowner *so; + struct nfs4_openowner *oo; + struct nfs4_client *clp; - list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) - return so; + list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; + if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { + oo = openowner(so); + clp = oo->oo_owner.so_client; + if ((bool)clp->cl_minorversion != sessions) + return NULL; + renew_client(oo->oo_owner.so_client); + return oo; + } } return NULL; } @@ -1148,63 +2788,18 @@ find_file(struct inode *ino) unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + spin_lock(&state_lock); + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); + spin_unlock(&state_lock); return fp; } } + spin_unlock(&state_lock); return NULL; } -static inline int access_valid(u32 x) -{ - if (x < NFS4_SHARE_ACCESS_READ) - return 0; - if (x > NFS4_SHARE_ACCESS_BOTH) - return 0; - return 1; -} - -static inline int deny_valid(u32 x) -{ - /* Note: unlike access bits, deny bits may be zero. */ - return x <= NFS4_SHARE_DENY_BOTH; -} - -static void -set_access(unsigned int *access, unsigned long bmap) { - int i; - - *access = 0; - for (i = 1; i < 4; i++) { - if (test_bit(i, &bmap)) - *access |= i; - } -} - -static void -set_deny(unsigned int *deny, unsigned long bmap) { - int i; - - *deny = 0; - for (i = 0; i < 4; i++) { - if (test_bit(i, &bmap)) - *deny |= i ; - } -} - -static int -test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - set_access(&access, stp->st_access_bmap); - set_deny(&deny, stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return 0; - return 1; -} - /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid @@ -1214,19 +2809,17 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; __be32 ret; - dprintk("NFSD: nfs4_share_conflict\n"); - fp = find_file(ino); if (!fp) return nfs_ok; ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_bit(deny_type, &stp->st_deny_bmap) || - test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + if (test_deny(deny_type, stp) || + test_deny(NFS4_SHARE_DENY_BOTH, stp)) goto out; } ret = nfs_ok; @@ -1235,131 +2828,57 @@ out: return ret; } -static inline void -nfs4_file_downgrade(struct file *filp, unsigned int share_access) -{ - if (share_access & NFS4_SHARE_ACCESS_WRITE) { - put_write_access(filp->f_path.dentry->d_inode); - filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; - } -} - -/* - * Recall a delegation - */ -static int -do_recall(void *__dp) -{ - struct nfs4_delegation *dp = __dp; - - dp->dl_file->fi_had_conflict = true; - nfsd4_cb_recall(dp); - return 0; -} - -/* - * Spawn a thread to perform a recall on the delegation represented - * by the lease (file_lock) - * - * Called from break_lease() with lock_kernel() held. - * Note: we assume break_lease will only call this *once* for any given - * lease. - */ -static -void nfsd_break_deleg_cb(struct file_lock *fl) +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { - struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; - struct task_struct *t; - - dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); - if (!dp) - return; + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&state_lock); /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - atomic_inc(&dp->dl_client->cl_count); - spin_lock(&recall_lock); - list_add_tail(&dp->dl_recall_lru, &del_recall_lru); - spin_unlock(&recall_lock); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - /* only place dl_time is set. protected by lock_kernel*/ + /* Only place dl_time is set; protected by i_lock: */ dp->dl_time = get_seconds(); - /* - * We don't want the locks code to timeout the lease for us; - * we'll remove it ourself if the delegation isn't returned - * in time. - */ - fl->fl_break_time = 0; + block_delegations(&dp->dl_fh); - t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); - if (IS_ERR(t)) { - struct nfs4_client *clp = dp->dl_client; - - printk(KERN_INFO "NFSD: Callback thread failed for " - "for client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - put_nfs4_client(dp->dl_client); - nfs4_put_delegation(dp); - } + nfsd4_cb_recall(dp); } -/* - * The file_lock is being reapd. - * - * Called by locks_free_lock() with lock_kernel() held. - */ -static -void nfsd_release_deleg_cb(struct file_lock *fl) +/* Called from break_lease() with i_lock held. */ +static void nfsd_break_deleg_cb(struct file_lock *fl) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; - - dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count)); + struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; + struct nfs4_delegation *dp; - if (!(fl->fl_flags & FL_LEASE) || !dp) + if (!fp) { + WARN(1, "(%p)->fl_owner NULL\n", fl); return; - dp->dl_flock = NULL; -} - -/* - * Set the delegation file_lock back pointer. - * - * Called from setlease() with lock_kernel() held. - */ -static -void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) -{ - struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner; - - dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp); - if (!dp) + } + if (fp->fi_had_conflict) { + WARN(1, "duplicate break on %p\n", fp); return; - dp->dl_flock = new; -} - -/* - * Called from setlease() with lock_kernel() held - */ -static -int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) -{ - struct nfs4_delegation *onlistd = - (struct nfs4_delegation *)onlist->fl_owner; - struct nfs4_delegation *tryd = - (struct nfs4_delegation *)try->fl_owner; - - if (onlist->fl_lmops != try->fl_lmops) - return 0; + } + /* + * We don't want the locks code to timeout the lease for us; + * we'll remove it ourself if a delegation isn't returned + * in time: + */ + fl->fl_break_time = 0; - return onlistd->dl_client == tryd->dl_client; + spin_lock(&state_lock); + fp->fi_had_conflict = true; + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); + spin_unlock(&state_lock); } - static int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) { @@ -1369,69 +2888,73 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) return -EAGAIN; } -static struct lock_manager_operations nfsd_lease_mng_ops = { - .fl_break = nfsd_break_deleg_cb, - .fl_release_private = nfsd_release_deleg_cb, - .fl_copy_lock = nfsd_copy_lock_deleg_cb, - .fl_mylease = nfsd_same_client_deleg_cb, - .fl_change = nfsd_change_deleg_cb, +static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_break = nfsd_break_deleg_cb, + .lm_change = nfsd_change_deleg_cb, }; +static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) +{ + if (nfsd4_has_session(cstate)) + return nfs_ok; + if (seqid == so->so_seqid - 1) + return nfserr_replay_me; + if (seqid == so->so_seqid) + return nfs_ok; + return nfserr_bad_seqid; +} __be32 -nfsd4_process_open1(struct nfsd4_open *open) +nfsd4_process_open1(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, struct nfsd_net *nn) { clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; - struct nfs4_stateowner *sop = NULL; - - if (!check_name(open->op_owner)) - return nfserr_inval; + struct nfs4_openowner *oo = NULL; + __be32 status; - if (STALE_CLIENTID(&open->op_clientid)) + if (STALE_CLIENTID(&open->op_clientid, nn)) return nfserr_stale_clientid; - - strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); - sop = find_openstateowner_str(strhashval, open); - open->op_stateowner = sop; - if (!sop) { - /* Make sure the client's lease hasn't expired. */ - clp = find_confirmed_client(clientid); + /* + * In case we need it later, after we've already created the + * file and don't want to risk a further failure: + */ + open->op_file = nfsd4_alloc_file(); + if (open->op_file == NULL) + return nfserr_jukebox; + + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); + oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); + open->op_openowner = oo; + if (!oo) { + clp = find_confirmed_client(clientid, cstate->minorversion, + nn); if (clp == NULL) return nfserr_expired; - goto renew; + goto new_owner; } - if (!sop->so_confirmed) { + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ - clp = sop->so_client; - release_stateowner(sop); - open->op_stateowner = NULL; - goto renew; - } - if (open->op_seqid == sop->so_seqid - 1) { - if (sop->so_replay.rp_buflen) - return nfserr_replay_me; - /* The original OPEN failed so spectacularly - * that we don't even have replay data saved! - * Therefore, we have no choice but to continue - * processing this OPEN; presumably, we'll - * fail again for the same reason. - */ - dprintk("nfsd4_process_open1: replay with no replay cache\n"); - goto renew; - } - if (open->op_seqid != sop->so_seqid) - return nfserr_bad_seqid; -renew: - if (open->op_stateowner == NULL) { - sop = alloc_init_open_stateowner(strhashval, clp, open); - if (sop == NULL) - return nfserr_resource; - open->op_stateowner = sop; - } - list_del_init(&sop->so_close_lru); - renew_client(sop->so_client); + clp = oo->oo_owner.so_client; + release_openowner(oo); + open->op_openowner = NULL; + goto new_owner; + } + status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); + if (status) + return status; + clp = oo->oo_owner.so_client; + goto alloc_stateid; +new_owner: + oo = alloc_init_open_stateowner(strhashval, clp, open); + if (oo == NULL) + return nfserr_jukebox; + open->op_openowner = oo; +alloc_stateid: + open->op_stp = nfs4_alloc_stateid(clp); + if (!open->op_stp) + return nfserr_jukebox; return nfs_ok; } @@ -1444,98 +2967,97 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) return nfs_ok; } -static struct nfs4_delegation * -find_delegation_file(struct nfs4_file *fp, stateid_t *stid) +static int share_access_to_flags(u32 share_access) { - struct nfs4_delegation *dp; + return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; +} - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { - if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) - return dp; - } - return NULL; +static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) +{ + struct nfs4_stid *ret; + + ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + if (!ret) + return NULL; + return delegstateid(ret); +} + +static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) +{ + return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; } static __be32 -nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, +nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; __be32 status = nfserr_bad_stateid; - *dp = find_delegation_file(fp, &open->op_delegate_stateid); + *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); if (*dp == NULL) goto out; - flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ? - RD_STATE : WR_STATE; + flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(*dp, flags); if (status) *dp = NULL; out: - if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR) + if (!nfsd4_is_deleg_cur(open)) return nfs_ok; if (status) return status; - open->op_stateowner->so_confirmed = 1; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; return nfs_ok; } static __be32 -nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) +nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) { - struct nfs4_stateid *local; - __be32 status = nfserr_share_denied; - struct nfs4_stateowner *sop = open->op_stateowner; + struct nfs4_ol_stateid *local; + struct nfs4_openowner *oo = open->op_openowner; list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; /* remember if we have seen this open owner */ - if (local->st_stateowner == sop) + if (local->st_stateowner == &oo->oo_owner) *stpp = local; /* check for conflicting share reservations */ if (!test_share(local, open)) - goto out; + return nfserr_share_denied; } - status = 0; -out: - return status; + return nfs_ok; } -static inline struct nfs4_stateid * -nfs4_alloc_stateid(void) +static inline int nfs4_access_to_access(u32 nfs4_access) { - return kmem_cache_alloc(stateid_slab, GFP_KERNEL); + int flags = 0; + + if (nfs4_access & NFS4_SHARE_ACCESS_READ) + flags |= NFSD_MAY_READ; + if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) + flags |= NFSD_MAY_WRITE; + return flags; } -static __be32 -nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, - struct nfs4_delegation *dp, - struct svc_fh *cur_fh, int flags) +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfsd4_open *open) { - struct nfs4_stateid *stp; - - stp = nfs4_alloc_stateid(); - if (stp == NULL) - return nfserr_resource; + __be32 status; + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); - if (dp) { - get_file(dp->dl_vfs_file); - stp->st_vfs_file = dp->dl_vfs_file; - } else { - __be32 status; - status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, - &stp->st_vfs_file); - if (status) { - if (status == nfserr_dropit) - status = nfserr_jukebox; - kmem_cache_free(stateid_slab, stp); + if (!fp->fi_fds[oflag]) { + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, + &fp->fi_fds[oflag]); + if (status) return status; - } } - *stpp = stp; - return 0; + nfs4_file_get_access(fp, oflag); + + return nfs_ok; } static inline __be32 @@ -1554,119 +3076,229 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - struct file *filp = stp->st_vfs_file; - struct inode *inode = filp->f_path.dentry->d_inode; - unsigned int share_access, new_writer; + u32 op_share_access = open->op_share_access; + bool new_access; __be32 status; - set_access(&share_access, stp->st_access_bmap); - new_writer = (~share_access) & open->op_share_access - & NFS4_SHARE_ACCESS_WRITE; - - if (new_writer) { - int err = get_write_access(inode); - if (err) - return nfserrno(err); + new_access = !test_access(op_share_access, stp); + if (new_access) { + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); + if (status) + return status; } status = nfsd4_truncate(rqstp, cur_fh, open); if (status) { - if (new_writer) - put_write_access(inode); + if (new_access) { + int oflag = nfs4_access_to_omode(op_share_access); + nfs4_file_put_access(fp, oflag); + } return status; } /* remember the open */ - filp->f_mode |= open->op_share_access; - set_bit(open->op_share_access, &stp->st_access_bmap); - set_bit(open->op_share_deny, &stp->st_deny_bmap); + set_access(op_share_access, stp); + set_deny(open->op_share_deny, stp); return nfs_ok; } static void -nfs4_set_claim_prev(struct nfsd4_open *open) +nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) +{ + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; +} + +/* Should we give out recallable state?: */ +static bool nfsd4_cb_channel_good(struct nfs4_client *clp) +{ + if (clp->cl_cb_state == NFSD4_CB_UP) + return true; + /* + * In the sessions case, since we don't have to establish a + * separate connection for callbacks, we assume it's OK + * until we hear otherwise: + */ + return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; +} + +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +{ + struct file_lock *fl; + + fl = locks_alloc_lock(); + if (!fl) + return NULL; + locks_init_lock(fl); + fl->fl_lmops = &nfsd_lease_mng_ops; + fl->fl_flags = FL_DELEG; + fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_pid = current->tgid; + return fl; +} + +static int nfs4_setlease(struct nfs4_delegation *dp) { - open->op_stateowner->so_confirmed = 1; - open->op_stateowner->so_client->cl_firststate = 1; + struct nfs4_file *fp = dp->dl_file; + struct file_lock *fl; + int status; + + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + if (!fl) + return -ENOMEM; + fl->fl_file = find_readable_file(fp); + status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); + if (status) + goto out_free; + fp->fi_lease = fl; + fp->fi_deleg_file = get_file(fl->fl_file); + atomic_set(&fp->fi_delegees, 1); + spin_lock(&state_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); + return 0; +out_free: + locks_free_lock(fl); + return status; +} + +static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + if (fp->fi_had_conflict) + return -EAGAIN; + get_nfs4_file(fp); + dp->dl_file = fp; + if (!fp->fi_lease) + return nfs4_setlease(dp); + spin_lock(&state_lock); + atomic_inc(&fp->fi_delegees); + if (fp->fi_had_conflict) { + spin_unlock(&state_lock); + return -EAGAIN; + } + hash_delegation_locked(dp, fp); + spin_unlock(&state_lock); + return 0; +} + +static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) +{ + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (status == -EAGAIN) + open->op_why_no_deleg = WND4_CONTENTION; + else { + open->op_why_no_deleg = WND4_RESOURCE; + switch (open->op_deleg_want) { + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + break; + case NFS4_SHARE_WANT_CANCEL: + open->op_why_no_deleg = WND4_CANCELLED; + break; + case NFS4_SHARE_WANT_NO_DELEG: + WARN_ON_ONCE(1); + } + } } /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp) +nfs4_open_delegation(struct net *net, struct svc_fh *fh, + struct nfsd4_open *open, struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; - struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_callback *cb = &sop->so_client->cl_callback; - struct file_lock fl, *flp = &fl; - int status, flag = 0; + struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); + int cb_up; + int status = 0; - flag = NFS4_OPEN_DELEGATE_NONE; + cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: - if (!atomic_read(&cb->cb_set)) + if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ - if (nfs4_in_grace()) - goto out; - if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) - goto out; + case NFS4_OPEN_CLAIM_FH: + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ + if (locks_in_grace(net)) + goto out_no_deleg; + if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) + goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); + if (dp == NULL) + goto out_no_deleg; + status = nfs4_set_delegation(dp, stp->st_file); + if (status) + goto out_free; - dp = alloc_init_deleg(sop->so_client, stp, fh, flag); - if (dp == NULL) { - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; - } - locks_init_lock(&fl); - fl.fl_lmops = &nfsd_lease_mng_ops; - fl.fl_flags = FL_LEASE; - fl.fl_end = OFFSET_MAX; - fl.fl_owner = (fl_owner_t)dp; - fl.fl_file = stp->st_vfs_file; - fl.fl_pid = current->tgid; + memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); - /* vfs_setlease checks to see if delegation should be handed out. - * the lock_manager callbacks fl_mylease and fl_change are used - */ - if ((status = vfs_setlease(stp->st_vfs_file, - flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { - dprintk("NFSD: setlease failed [%d], no delegation\n", status); - unhash_delegation(dp); - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", + STATEID_VAL(&dp->dl_stid.sc_stateid)); + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + return; +out_free: + destroy_delegation(dp); +out_no_deleg: + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; } - memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; +} - dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", - dp->dl_stateid.si_boot, - dp->dl_stateid.si_stateownerid, - dp->dl_stateid.si_fileid, - dp->dl_stateid.si_generation); -out: - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS - && flag == NFS4_OPEN_DELEGATE_NONE - && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - open->op_delegate_type = flag; +static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, + struct nfs4_delegation *dp) +{ + if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } + /* Otherwise the client must be confused wanting a delegation + * it already has, therefore we don't return + * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + */ } /* @@ -1675,16 +3307,14 @@ out: __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct inode *ino = current_fh->fh_dentry->d_inode; - struct nfs4_stateid *stp = NULL; + struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; - status = nfserr_inval; - if (!access_valid(open->op_share_access) - || !deny_valid(open->op_share_deny)) - goto out; /* * Lookup file; if found, lookup stateid and check open request, * and check for delegations in the process of being recalled. @@ -1694,17 +3324,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; - status = nfs4_check_deleg(fp, open, &dp); + status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; } else { status = nfserr_bad_stateid; - if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) - goto out; - status = nfserr_resource; - fp = alloc_init_file(ino); - if (fp == NULL) + if (nfsd4_is_deleg_cur(open)) goto out; + status = nfserr_jukebox; + fp = open->op_file; + open->op_file = NULL; + nfsd4_init_file(fp, ino); } /* @@ -1713,58 +3343,96 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ - status = nfs4_upgrade_open(rqstp, current_fh, stp, open); + status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) goto out; - update_stateid(&stp->st_stateid); } else { - /* Stateid was not found, this is a new OPEN */ - int flags = 0; - if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - flags |= MAY_READ; - if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flags |= MAY_WRITE; - status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); if (status) goto out; - init_stateid(stp, fp, open); status = nfsd4_truncate(rqstp, current_fh, open); - if (status) { - release_stateid(stp, OPEN_STATE); + if (status) goto out; + stp = open->op_stp; + open->op_stp = NULL; + init_open_stateid(stp, fp, open); + } + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + + if (nfsd4_has_session(&resp->cstate)) { + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + + if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_WANTED; + goto nodeleg; } } - memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); - + nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); +nodeleg: status = nfs_ok; - dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", - stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, - stp->st_stateid.si_fileid, stp->st_stateid.si_generation); + dprintk("%s: stateid=" STATEID_FMT "\n", __func__, + STATEID_VAL(&stp->st_stid.sc_stateid)); out: + /* 4.1 client trying to upgrade/downgrade delegation? */ + if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + open->op_deleg_want) + nfsd4_deleg_xgrade_none_ext(open, dp); + if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - nfs4_set_claim_prev(open); + nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); /* * To finish the open response, we just need to set the rflags. */ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; - if (!open->op_stateowner->so_confirmed) + if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && + !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; return status; } -static struct workqueue_struct *laundry_wq; -static void laundromat_main(struct work_struct *); -static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main); +void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +{ + if (open->op_openowner) { + struct nfs4_openowner *oo = open->op_openowner; + + if (!list_empty(&oo->oo_owner.so_stateids)) + list_del_init(&oo->oo_close_lru); + if (oo->oo_flags & NFS4_OO_NEW) { + if (status) { + release_openowner(oo); + open->op_openowner = NULL; + } else + oo->oo_flags &= ~NFS4_OO_NEW; + } + } + if (open->op_file) + nfsd4_free_file(open->op_file); + if (open->op_stp) + free_generic_stateid(open->op_stp); +} + +static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) +{ + struct nfs4_client *found; + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + found = find_confirmed_client(clid, session, nn); + if (clp) + *clp = found; + return found ? nfs_ok : nfserr_expired; +} __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1772,24 +3440,17 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_client *clp; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) - goto out; - clp = find_confirmed_client(clid); - status = nfserr_expired; - if (clp == NULL) { - /* We assume the client took too long to RENEW. */ - dprintk("nfsd4_renew: clientid not found!\n"); + status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + if (status) goto out; - } - renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_callback.cb_set)) + && clp->cl_cb_state != NFSD4_CB_UP) goto out; status = nfs_ok; out: @@ -1798,145 +3459,142 @@ out: } static void -end_grace(void) +nfsd4_end_grace(struct nfsd_net *nn) { + /* do nothing if grace period already ended */ + if (nn->grace_ended) + return; + dprintk("NFSD: end of grace period\n"); - nfsd4_recdir_purge_old(); - in_grace = 0; + nn->grace_ended = true; + nfsd4_record_grace_done(nn, nn->boot_time); + locks_end_grace(&nn->nfsd4_manager); + /* + * Now that every NFSv4 client has had the chance to recover and + * to see the (possibly new, possibly shorter) lease time, we + * can safely set the next grace time to the current lease time: + */ + nn->nfsd4_grace = nn->nfsd4_lease; } static time_t -nfs4_laundromat(void) +nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; - struct nfs4_stateowner *sop; + struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - NFSD_LEASE_TIME; - time_t t, clientid_val = NFSD_LEASE_TIME; - time_t u, test_val = NFSD_LEASE_TIME; + time_t cutoff = get_seconds() - nn->nfsd4_lease; + time_t t, new_timeo = nn->nfsd4_lease; nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - if (in_grace) - end_grace(); - list_for_each_safe(pos, next, &client_lru) { + nfsd4_end_grace(nn); + INIT_LIST_HEAD(&reaplist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (clientid_val > t) - clientid_val = t; + new_timeo = min(new_timeo, t); break; } + if (mark_client_expired_locked(clp)) { + dprintk("NFSD: client in use (clientid %08x)\n", + clp->cl_clientid.cl_id); + continue; + } + list_move(&clp->cl_lru, &reaplist); + } + spin_unlock(&nn->client_lock); + list_for_each_safe(pos, next, &reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); - nfsd4_remove_clid_dir(clp); expire_client(clp); } - INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn) + continue; if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { - u = dp->dl_time - cutoff; - if (test_val > u) - test_val = u; + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; } - dprintk("NFSD: purging unused delegation dp %p, fp %p\n", - dp, dp->dl_flock); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); - unhash_delegation(dp); + revoke_delegation(dp); } - test_val = NFSD_LEASE_TIME; - list_for_each_safe(pos, next, &close_lru) { - sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); - if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { - u = sop->so_time - cutoff; - if (test_val > u) - test_val = u; + list_for_each_safe(pos, next, &nn->close_lru) { + oo = container_of(pos, struct nfs4_openowner, oo_close_lru); + if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; } - dprintk("NFSD: purging unused open stateowner (so_id %d)\n", - sop->so_id); - release_stateowner(sop); + release_openowner(oo); } - if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) - clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); - return clientid_val; + return new_timeo; } -void -laundromat_main(struct work_struct *not_used) +static struct workqueue_struct *laundry_wq; +static void laundromat_main(struct work_struct *); + +static void +laundromat_main(struct work_struct *laundry) { time_t t; + struct delayed_work *dwork = container_of(laundry, struct delayed_work, + work); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + laundromat_work); - t = nfs4_laundromat(); + t = nfs4_laundromat(nn); dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); - queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); -} - -static struct nfs4_stateowner * -search_close_lru(u32 st_id, int flags) -{ - struct nfs4_stateowner *local = NULL; - - if (flags & CLOSE_STATE) { - list_for_each_entry(local, &close_lru, so_close_lru) { - if (local->so_id == st_id) - return local; - } - } - return NULL; -} - -static inline int -nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) -{ - return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode; + queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static int -STALE_STATEID(stateid_t *stateid) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - if (stateid->si_boot == boot_time) - return 0; - dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", - stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, - stateid->si_generation); - return 1; + if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + return nfserr_bad_stateid; + return nfs_ok; } static inline int -access_permit_read(unsigned long access_bmap) +access_permit_read(struct nfs4_ol_stateid *stp) { - return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); + return test_access(NFS4_SHARE_ACCESS_READ, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp) || + test_access(NFS4_SHARE_ACCESS_WRITE, stp); } static inline int -access_permit_write(unsigned long access_bmap) +access_permit_write(struct nfs4_ol_stateid *stp) { - return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || - test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); + return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp); } static -__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) +__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) { __be32 status = nfserr_openmode; - if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) + /* For lock stateid's, we test the parent open, not the lock: */ + if (stp->st_openstp) + stp = stp->st_openstp; + if ((flags & WR_STATE) && !access_permit_write(stp)) goto out; - if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) + if ((flags & RD_STATE) && !access_permit_read(stp)) goto out; status = nfs_ok; out: @@ -1944,15 +3602,12 @@ out: } static inline __be32 -check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) +check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags) { - /* Trying to call delegreturn with a special stateid? Yuch: */ - if (!(flags & (RD_STATE | WR_STATE))) - return nfserr_bad_stateid; - else if (ONE_STATEID(stateid) && (flags & RD_STATE)) + if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (nfs4_in_grace()) { - /* Answer in remaining cases depends on existance of + else if (locks_in_grace(net)) { + /* Answer in remaining cases depends on existence of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; } else if (flags & WR_STATE) @@ -1968,85 +3623,266 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) * that are not able to provide mandatory locking. */ static inline int -io_during_grace_disallowed(struct inode *inode, int flags) +grace_disallows_io(struct net *net, struct inode *inode) +{ + return locks_in_grace(net) && mandatory_lock(inode); +} + +/* Returns true iff a is later than b: */ +static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) - && mandatory_lock(inode); + return (s32)(a->si_generation - b->si_generation) > 0; +} + +static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) +{ + /* + * When sessions are used the stateid generation number is ignored + * when it is zero. + */ + if (has_session && in->si_generation == 0) + return nfs_ok; + + if (in->si_generation == ref->si_generation) + return nfs_ok; + + /* If the client sends us a stateid from the future, it's buggy: */ + if (stateid_generation_after(in, ref)) + return nfserr_bad_stateid; + /* + * However, we could see a stateid from the past, even from a + * non-buggy client. For example, if the client sends a lock + * while some IO is outstanding, the lock may bump si_generation + * while the IO is still in flight. The client could avoid that + * situation by waiting for responses on all the IO requests, + * but better performance may result in retrying IO that + * receives an old_stateid error if requests are rarely + * reordered in flight: + */ + return nfserr_old_stateid; +} + +static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) +{ + struct nfs4_stid *s; + struct nfs4_ol_stateid *ols; + __be32 status; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + /* Client debugging aid. */ + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, + sizeof(addr_str)); + pr_warn_ratelimited("NFSD: client %s testing state ID " + "with incorrect client ID\n", addr_str); + return nfserr_bad_stateid; + } + s = find_stateid(cl, stateid); + if (!s) + return nfserr_bad_stateid; + status = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (status) + return status; + switch (s->sc_type) { + case NFS4_DELEG_STID: + return nfs_ok; + case NFS4_REVOKED_DELEG_STID: + return nfserr_deleg_revoked; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ols = openlockstateid(s); + if (ols->st_stateowner->so_is_open_owner + && !(openowner(ols->st_stateowner)->oo_flags + & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; + default: + printk("unknown stateid type %x\n", s->sc_type); + case NFS4_CLOSED_STID: + return nfserr_bad_stateid; + } +} + +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, bool sessions, + struct nfsd_net *nn) +{ + struct nfs4_client *cl; + __be32 status; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, + nn, &cl); + if (status == nfserr_stale_clientid) { + if (sessions) + return nfserr_bad_stateid; + return nfserr_stale_stateid; + } + if (status) + return status; + *s = find_stateid_by_type(cl, stateid, typemask); + if (!*s) + return nfserr_bad_stateid; + return nfs_ok; } /* * Checks for stateid operations */ __be32 -nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) +nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, + stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stateid *stp = NULL; + struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; - stateid_t *stidp; + struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct file *file = NULL; __be32 status; - dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", - stateid->si_boot, stateid->si_stateownerid, - stateid->si_fileid, stateid->si_generation); if (filpp) *filpp = NULL; - if (io_during_grace_disallowed(ino, flags)) + if (grace_disallows_io(net, ino)) return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return check_special_stateids(current_fh, stateid, flags); + return check_special_stateids(net, current_fh, stateid, flags); - /* STALE STATEID */ - status = nfserr_stale_stateid; - if (STALE_STATEID(stateid)) - goto out; + nfs4_lock_state(); - /* BAD STATEID */ - status = nfserr_bad_stateid; - if (!stateid->si_fileid) { /* delegation stateid */ - if(!(dp = find_delegation_stateid(ino, stateid))) { - dprintk("NFSD: delegation stateid not found\n"); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, cstate->minorversion, nn); + if (status) + goto out; + status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + if (status) + goto out; + switch (s->sc_type) { + case NFS4_DELEG_STID: + dp = delegstateid(s); + status = nfs4_check_delegmode(dp, flags); + if (status) goto out; + if (filpp) { + file = dp->dl_file->fi_deleg_file; + if (!file) { + WARN_ON_ONCE(1); + status = nfserr_serverfault; + goto out; + } } - stidp = &dp->dl_stateid; - } else { /* open or lock stateid */ - if (!(stp = find_stateid(stateid, flags))) { - dprintk("NFSD: open or lock stateid not found\n"); + break; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + stp = openlockstateid(s); + status = nfs4_check_fh(current_fh, stp); + if (status) goto out; - } - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) + if (stp->st_stateowner->so_is_open_owner + && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) goto out; - if (!stp->st_stateowner->so_confirmed) + status = nfs4_check_openmode(stp, flags); + if (status) goto out; - stidp = &stp->st_stateid; - } - if (stateid->si_generation > stidp->si_generation) + if (filpp) { + if (flags & RD_STATE) + file = find_readable_file(stp->st_file); + else + file = find_writeable_file(stp->st_file); + } + break; + default: + status = nfserr_bad_stateid; goto out; + } + status = nfs_ok; + if (file) + *filpp = get_file(file); +out: + nfs4_unlock_state(); + return status; +} + +static __be32 +nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) + return nfserr_locks_held; + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + release_lockowner(lo); + return nfs_ok; +} + +/* + * Test if the stateid is valid + */ +__be32 +nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_test_stateid_id *stateid; + struct nfs4_client *cl = cstate->session->se_client; + + nfs4_lock_state(); + list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) + stateid->ts_id_status = + nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); + nfs4_unlock_state(); + + return nfs_ok; +} + +__be32 +nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_free_stateid *free_stateid) +{ + stateid_t *stateid = &free_stateid->fr_stateid; + struct nfs4_stid *s; + struct nfs4_delegation *dp; + struct nfs4_client *cl = cstate->session->se_client; + __be32 ret = nfserr_bad_stateid; - /* OLD STATEID */ - status = nfserr_old_stateid; - if (stateid->si_generation < stidp->si_generation) + nfs4_lock_state(); + s = find_stateid(cl, stateid); + if (!s) goto out; - if (stp) { - if ((status = nfs4_check_openmode(stp,flags))) - goto out; - renew_client(stp->st_stateowner->so_client); - if (filpp) - *filpp = stp->st_vfs_file; - } else if (dp) { - if ((status = nfs4_check_delegmode(dp, flags))) + switch (s->sc_type) { + case NFS4_DELEG_STID: + ret = nfserr_locks_held; + goto out; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) goto out; - renew_client(dp->dl_client); - if (flags & DELEG_RET) - unhash_delegation(dp); - if (filpp) - *filpp = dp->dl_vfs_file; + if (s->sc_type == NFS4_LOCK_STID) + ret = nfsd4_free_lock_stateid(openlockstateid(s)); + else + ret = nfserr_locks_held; + break; + case NFS4_REVOKED_DELEG_STID: + dp = delegstateid(s); + destroy_revoked_delegation(dp); + ret = nfs_ok; + break; + default: + ret = nfserr_bad_stateid; } - status = nfs_ok; out: - return status; + nfs4_unlock_state(); + return ret; } static inline int @@ -2056,121 +3892,70 @@ setlkflg (int type) RD_STATE : WR_STATE; } +static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) +{ + struct svc_fh *current_fh = &cstate->current_fh; + struct nfs4_stateowner *sop = stp->st_stateowner; + __be32 status; + + status = nfsd4_check_seqid(cstate, sop, seqid); + if (status) + return status; + if (stp->st_stid.sc_type == NFS4_CLOSED_STID + || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID) + /* + * "Closed" stateid's exist *only* to return + * nfserr_replay_me from the previous step, and + * revoked delegations are kept only for free_stateid. + */ + return nfserr_bad_stateid; + status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); + if (status) + return status; + return nfs4_check_fh(current_fh, stp); +} + /* * Checks for sequence id mutating operations. */ static __be32 -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) +nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, char typemask, + struct nfs4_ol_stateid **stpp, + struct nfsd_net *nn) { - struct nfs4_stateid *stp; - struct nfs4_stateowner *sop; + __be32 status; + struct nfs4_stid *s; - dprintk("NFSD: preprocess_seqid_op: seqid=%d " - "stateid = (%08x/%08x/%08x/%08x)\n", seqid, - stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, - stateid->si_generation); + dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, + seqid, STATEID_VAL(stateid)); *stpp = NULL; - *sopp = NULL; - - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - dprintk("NFSD: preprocess_seqid_op: magic stateid!\n"); - return nfserr_bad_stateid; - } - - if (STALE_STATEID(stateid)) - return nfserr_stale_stateid; - /* - * We return BAD_STATEID if filehandle doesn't match stateid, - * the confirmed flag is incorrecly set, or the generation - * number is incorrect. - */ - stp = find_stateid(stateid, flags); - if (stp == NULL) { - /* - * Also, we should make sure this isn't just the result of - * a replayed close: - */ - sop = search_close_lru(stateid->si_stateownerid, flags); - if (sop == NULL) - return nfserr_bad_stateid; - *sopp = sop; - goto check_replay; - } - - *stpp = stp; - *sopp = sop = stp->st_stateowner; - - if (lock) { - clientid_t *lockclid = &lock->v.new.clientid; - struct nfs4_client *clp = sop->so_client; - int lkflg = 0; - __be32 status; - - lkflg = setlkflg(lock->lk_type); - - if (lock->lk_is_new) { - if (!sop->so_is_open_owner) - return nfserr_bad_stateid; - if (!same_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; - /* stp is the open stateid */ - status = nfs4_check_openmode(stp, lkflg); - if (status) - return status; - } else { - /* stp is the lock stateid */ - status = nfs4_check_openmode(stp->st_openstp, lkflg); - if (status) - return status; - } - } + status = nfsd4_lookup_stateid(stateid, typemask, &s, + cstate->minorversion, nn); + if (status) + return status; + *stpp = openlockstateid(s); + if (!nfsd4_has_session(cstate)) + cstate->replay_owner = (*stpp)->st_stateowner; - if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { - dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); - return nfserr_bad_stateid; - } + return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); +} - /* - * We now validate the seqid and stateid generation numbers. - * For the moment, we ignore the possibility of - * generation number wraparound. - */ - if (seqid != sop->so_seqid) - goto check_replay; +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, + stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn) +{ + __be32 status; + struct nfs4_openowner *oo; - if (sop->so_confirmed && flags & CONFIRM) { - dprintk("NFSD: preprocess_seqid_op: expected" - " unconfirmed stateowner!\n"); - return nfserr_bad_stateid; - } - if (!sop->so_confirmed && !(flags & CONFIRM)) { - dprintk("NFSD: preprocess_seqid_op: stateowner not" - " confirmed yet!\n"); - return nfserr_bad_stateid; - } - if (stateid->si_generation > stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: future stateid?!\n"); + status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, + NFS4_OPEN_STID, stpp, nn); + if (status) + return status; + oo = openowner((*stpp)->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) return nfserr_bad_stateid; - } - - if (stateid->si_generation < stp->st_stateid.si_generation) { - dprintk("NFSD: preprocess_seqid_op: old stateid!\n"); - return nfserr_old_stateid; - } - renew_client(sop->so_client); return nfs_ok; - -check_replay: - if (seqid == sop->so_seqid - 1) { - dprintk("NFSD: preprocess_seqid_op: retransmission?\n"); - /* indicate replay to calling function */ - return nfserr_replay_me; - } - dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n", - sop->so_seqid, seqid); - *sopp = NULL; - return nfserr_bad_seqid; } __be32 @@ -2178,12 +3963,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open_confirm *oc) { __be32 status; - struct nfs4_stateowner *sop; - struct nfs4_stateid *stp; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_confirm on file %pd\n", + cstate->current_fh.fh_dentry); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -2191,55 +3976,63 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, + status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, - CHECK_FH | CONFIRM | OPEN_STATE, - &oc->oc_stateowner, &stp, NULL))) - goto out; + NFS4_OPEN_STID, &stp, nn); + if (status) + goto out; + oo = openowner(stp->st_stateowner); + status = nfserr_bad_stateid; + if (oo->oo_flags & NFS4_OO_CONFIRMED) + goto out; + oo->oo_flags |= NFS4_OO_CONFIRMED; + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", + __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - sop = oc->oc_stateowner; - sop->so_confirmed = 1; - update_stateid(&stp->st_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); - dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " - "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, - stp->st_stateid.si_boot, - stp->st_stateid.si_stateownerid, - stp->st_stateid.si_fileid, - stp->st_stateid.si_generation); - - nfsd4_create_clid_dir(sop->so_client); + nfsd4_client_record_create(oo->oo_owner.so_client); + status = nfs_ok; out: - if (oc->oc_stateowner) { - nfs4_get_stateowner(oc->oc_stateowner); - cstate->replay_owner = oc->oc_stateowner; - } - nfs4_unlock_state(); + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } +static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) +{ + if (!test_access(access, stp)) + return; + nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + clear_access(access, stp); +} -/* - * unset all bits in union bitmap (bmap) that - * do not exist in share (from successful OPEN_DOWNGRADE) - */ -static void -reset_union_bmap_access(unsigned long access, unsigned long *bmap) +static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) { - int i; - for (i = 1; i < 4; i++) { - if ((i & access) != i) - __clear_bit(i, bmap); + switch (to_access) { + case NFS4_SHARE_ACCESS_READ: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_WRITE: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + WARN_ON_ONCE(1); } } static void -reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) +reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) { int i; for (i = 0; i < 4; i++) { if ((i & deny) != i) - __clear_bit(i, bmap); + clear_deny(i, stp); } } @@ -2249,55 +4042,53 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_open_downgrade *od) { __be32 status; - struct nfs4_stateid *stp; - unsigned int share_access; + struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", + cstate->current_fh.fh_dentry); - if (!access_valid(od->od_share_access) - || !deny_valid(od->od_share_deny)) - return nfserr_inval; + /* We don't yet support WANT bits: */ + if (od->od_deleg_want) + dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, + od->od_deleg_want); nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, - od->od_seqid, - &od->od_stateid, - CHECK_FH | OPEN_STATE, - &od->od_stateowner, &stp, NULL))) + status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, + &od->od_stateid, &stp, nn); + if (status) goto out; - status = nfserr_inval; - if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { - dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", + if (!test_access(od->od_share_access, stp)) { + dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto out; } - if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { + if (!test_deny(od->od_share_deny, stp)) { dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto out; } - set_access(&share_access, stp->st_access_bmap); - nfs4_file_downgrade(stp->st_vfs_file, - share_access & ~od->od_share_access); + nfs4_stateid_downgrade(stp, od->od_share_access); - reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); - reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); + reset_union_bmap_deny(od->od_share_deny, stp); - update_stateid(&stp->st_stateid); - memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; out: - if (od->od_stateowner) { - nfs4_get_stateowner(od->od_stateowner); - cstate->replay_owner = od->od_stateowner; - } - nfs4_unlock_state(); + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } +static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) +{ + unhash_open_stateid(s); + s->st_stid.sc_type = NFS4_CLOSED_STID; +} + /* * nfs4_unlock_state() called after encode */ @@ -2306,39 +4097,47 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_close *close) { __be32 status; - struct nfs4_stateid *stp; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %.*s\n", - (int)cstate->current_fh.fh_dentry->d_name.len, - cstate->current_fh.fh_dentry->d_name.name); + dprintk("NFSD: nfsd4_close on file %pd\n", + cstate->current_fh.fh_dentry); nfs4_lock_state(); - /* check close_lru for replay */ - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, - close->cl_seqid, - &close->cl_stateid, - CHECK_FH | OPEN_STATE | CLOSE_STATE, - &close->cl_stateowner, &stp, NULL))) + status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, + &close->cl_stateid, + NFS4_OPEN_STID|NFS4_CLOSED_STID, + &stp, nn); + nfsd4_bump_seqid(cstate, status); + if (status) goto out; - status = nfs_ok; - update_stateid(&stp->st_stateid); - memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); + oo = openowner(stp->st_stateowner); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - /* release_stateid() calls nfsd_close() if needed */ - release_stateid(stp, OPEN_STATE); + nfsd4_close_open_stateid(stp); - /* place unused nfs4_stateowners on so_close_lru list to be - * released by the laundromat service after the lease period - * to enable us to handle CLOSE replay - */ - if (list_empty(&close->cl_stateowner->so_stateids)) - move_to_close_lru(close->cl_stateowner); -out: - if (close->cl_stateowner) { - nfs4_get_stateowner(close->cl_stateowner); - cstate->replay_owner = close->cl_stateowner; + if (cstate->minorversion) + free_generic_stateid(stp); + else + oo->oo_last_closed_stid = stp; + + if (list_empty(&oo->oo_owner.so_stateids)) { + if (cstate->minorversion) + release_openowner(oo); + else { + /* + * In the 4.0 case we need to keep the owners around a + * little while to handle CLOSE replay. + */ + move_to_close_lru(oo, SVC_NET(rqstp)); + } } - nfs4_unlock_state(); +out: + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } @@ -2346,88 +4145,62 @@ __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *dr) { + struct nfs4_delegation *dp; + stateid_t *stateid = &dr->dr_stateid; + struct nfs4_stid *s; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) - goto out; + return status; nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(&cstate->current_fh, - &dr->dr_stateid, DELEG_RET, NULL); - nfs4_unlock_state(); + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, + cstate->minorversion, nn); + if (status) + goto out; + dp = delegstateid(s); + status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); + if (status) + goto out; + + destroy_delegation(dp); out: + nfs4_unlock_state(); + return status; } -/* - * Lock owner state (byte-range locks) - */ #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCK_HASH_BITS 8 -#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) -#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) -#define lockownerid_hashval(id) \ - ((id) & LOCK_HASH_MASK) +#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) -static inline unsigned int -lock_ownerstr_hashval(struct inode *inode, u32 cl_id, - struct xdr_netobj *ownername) +static inline u64 +end_offset(u64 start, u64 len) { - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCK_HASH_MASK; -} + u64 end; -static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; -static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; -static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; + end = start + len; + return end >= start ? end: NFS4_MAX_UINT64; +} -static struct nfs4_stateid * -find_stateid(stateid_t *stid, int flags) +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) { - struct nfs4_stateid *local = NULL; - u32 st_id = stid->si_stateownerid; - u32 f_id = stid->si_fileid; - unsigned int hashval; + u64 end; - dprintk("NFSD: find_stateid flags 0x%x\n",flags); - if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { - hashval = stateid_hashval(st_id, f_id); - list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { - if ((local->st_stateid.si_stateownerid == st_id) && - (local->st_stateid.si_fileid == f_id)) - return local; - } - } - if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { - hashval = stateid_hashval(st_id, f_id); - list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { - if ((local->st_stateid.si_stateownerid == st_id) && - (local->st_stateid.si_fileid == f_id)) - return local; - } - } - return NULL; + WARN_ON_ONCE(!len); + end = start + len; + return end > start ? end - 1: NFS4_MAX_UINT64; } -static struct nfs4_delegation * -find_delegation_stateid(struct inode *ino, stateid_t *stid) +static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) { - struct nfs4_file *fp; - struct nfs4_delegation *dl; - - dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", - stid->si_boot, stid->si_stateownerid, - stid->si_fileid, stid->si_generation); - - fp = find_file(ino); - if (!fp) - return NULL; - dl = find_delegation_file(fp, stid); - put_nfs4_file(fp); - return dl; + return (file_hashval(inode) + cl_id + + opaque_hashval(ownername->data, ownername->len)) + & LOCKOWNER_INO_HASH_MASK; } /* @@ -2449,129 +4222,176 @@ nfs4_transform_lock_offset(struct file_lock *lock) /* Hack!: For now, we're defining this just so we can use a pointer to it * as a unique cookie to identify our (NFSv4's) posix locks. */ -static struct lock_manager_operations nfsd_posix_mng_ops = { +static const struct lock_manager_operations nfsd_posix_mng_ops = { }; static inline void nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) { - struct nfs4_stateowner *sop; - unsigned int hval; + struct nfs4_lockowner *lo; if (fl->fl_lmops == &nfsd_posix_mng_ops) { - sop = (struct nfs4_stateowner *) fl->fl_owner; - hval = lockownerid_hashval(sop->so_id); - kref_get(&sop->so_ref); - deny->ld_sop = sop; - deny->ld_clientid = sop->so_client->cl_clientid; + lo = (struct nfs4_lockowner *) fl->fl_owner; + deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, + lo->lo_owner.so_owner.len, GFP_KERNEL); + if (!deny->ld_owner.data) + /* We just don't care that much */ + goto nevermind; + deny->ld_owner.len = lo->lo_owner.so_owner.len; + deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { - deny->ld_sop = NULL; +nevermind: + deny->ld_owner.len = 0; + deny->ld_owner.data = NULL; deny->ld_clientid.cl_boot = 0; deny->ld_clientid.cl_id = 0; } deny->ld_start = fl->fl_start; - deny->ld_length = ~(u64)0; - if (fl->fl_end != ~(u64)0) + deny->ld_length = NFS4_MAX_UINT64; + if (fl->fl_end != NFS4_MAX_UINT64) deny->ld_length = fl->fl_end - fl->fl_start + 1; deny->ld_type = NFS4_READ_LT; if (fl->fl_type != F_RDLCK) deny->ld_type = NFS4_WRITE_LT; } -static struct nfs4_stateowner * -find_lockstateowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner) +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + +static struct nfs4_lockowner * +find_lockowner_str(struct inode *inode, clientid_t *clid, + struct xdr_netobj *owner, struct nfsd_net *nn) { - unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); - struct nfs4_stateowner *op; + unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; - list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return op; + list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } +static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) +{ + struct inode *inode = open_stp->st_file->fi_inode; + unsigned int inohash = lockowner_ino_hashval(inode, + clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); + list_add(&lo->lo_perstateid, &open_stp->st_lockowners); +} + /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has - * occured. + * occurred. * - * strhashval = lock_ownerstr_hashval + * strhashval = ownerstr_hashval */ -static struct nfs4_stateowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfs4_stateowner *sop; - struct nfs4_replay *rp; - unsigned int idhashval; +static struct nfs4_lockowner * +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { + struct nfs4_lockowner *lo; - if (!(sop = alloc_stateowner(&lock->lk_new_owner))) + lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); + if (!lo) return NULL; - idhashval = lockownerid_hashval(current_ownerid); - INIT_LIST_HEAD(&sop->so_idhash); - INIT_LIST_HEAD(&sop->so_strhash); - INIT_LIST_HEAD(&sop->so_perclient); - INIT_LIST_HEAD(&sop->so_stateids); - INIT_LIST_HEAD(&sop->so_perstateid); - INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ - sop->so_time = 0; - list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); - list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perstateid, &open_stp->st_lockowners); - sop->so_is_open_owner = 0; - sop->so_id = current_ownerid++; - sop->so_client = clp; + INIT_LIST_HEAD(&lo->lo_owner.so_stateids); + lo->lo_owner.so_is_open_owner = 0; /* It is the openowner seqid that will be incremented in encode in the * case of new lockowners; so increment the lock seqid manually: */ - sop->so_seqid = lock->lk_new_lock_seqid + 1; - sop->so_confirmed = 1; - rp = &sop->so_replay; - rp->rp_status = nfserr_serverfault; - rp->rp_buflen = 0; - rp->rp_buf = rp->rp_ibuf; - return sop; + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; + hash_lockowner(lo, strhashval, clp, open_stp); + return lo; } -static struct nfs4_stateid * -alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) +static struct nfs4_ol_stateid * +alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) { - struct nfs4_stateid *stp; - unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); + struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = lo->lo_owner.so_client; - stp = nfs4_alloc_stateid(); + stp = nfs4_alloc_stateid(clp); if (stp == NULL) - goto out; - INIT_LIST_HEAD(&stp->st_hash); - INIT_LIST_HEAD(&stp->st_perfile); - INIT_LIST_HEAD(&stp->st_perstateowner); - INIT_LIST_HEAD(&stp->st_lockowners); /* not used */ - list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); + return NULL; + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); - list_add(&stp->st_perstateowner, &sop->so_stateids); - stp->st_stateowner = sop; + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); + stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; - stp->st_stateid.si_stateownerid = sop->so_id; - stp->st_stateid.si_fileid = fp->fi_id; - stp->st_stateid.si_generation = 0; - stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ - stp->st_access_bmap = open_stp->st_access_bmap; + stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - -out: return stp; } static int check_lock_length(u64 offset, u64 length) { - return ((length == 0) || ((length != ~(u64)0) && + return ((length == 0) || ((length != NFS4_MAX_UINT64) && LOFF_OVERFLOW(offset, length))); } +static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) +{ + struct nfs4_file *fp = lock_stp->st_file; + int oflag = nfs4_access_to_omode(access); + + if (test_access(access, lock_stp)) + return; + nfs4_file_get_access(fp, oflag); + set_access(access, lock_stp); +} + +static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +{ + struct nfs4_file *fi = ost->st_file; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *cl = oo->oo_owner.so_client; + struct nfs4_lockowner *lo; + unsigned int strhashval; + struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); + + lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, + &lock->v.new.owner, nn); + if (lo) { + if (!cstate->minorversion) + return nfserr_bad_seqid; + /* XXX: a lockowner always has exactly one stateid: */ + *lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return nfs_ok; + } + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner(lo); + return nfserr_jukebox; + } + *new = true; + return nfs_ok; +} + /* * LOCK operation */ @@ -2579,16 +4399,18 @@ __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock) { - struct nfs4_stateowner *open_sop = NULL; - struct nfs4_stateowner *lock_sop = NULL; - struct nfs4_stateid *lock_stp; - struct file *filp; - struct file_lock file_lock; - struct file_lock conflock; + struct nfs4_openowner *open_sop = NULL; + struct nfs4_lockowner *lock_sop = NULL; + struct nfs4_ol_stateid *lock_stp; + struct file *filp = NULL; + struct file_lock *file_lock = NULL; + struct file_lock *conflock = NULL; __be32 status = 0; - unsigned int strhashval; - unsigned int cmd; + bool new_state = false; + int lkflg; int err; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", (long long) lock->lk_offset, @@ -2598,7 +4420,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if ((status = fh_verify(rqstp, &cstate->current_fh, - S_IFREG, MAY_LOCK))) { + S_IFREG, NFSD_MAY_LOCK))) { dprintk("NFSD: nfsd4_lock: permission denied!\n"); return status; } @@ -2606,136 +4428,152 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); if (lock->lk_is_new) { - /* - * Client indicates that this is a new lockowner. - * Use open owner and open stateid to create lock owner and - * lock stateid. - */ - struct nfs4_stateid *open_stp = NULL; - struct nfs4_file *fp; - + struct nfs4_ol_stateid *open_stp = NULL; + + if (nfsd4_has_session(cstate)) + /* See rfc 5661 18.10.3: given clientid is ignored: */ + memcpy(&lock->v.new.clientid, + &cstate->session->se_client->cl_clientid, + sizeof(clientid_t)); + status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lock->lk_new_clientid)) + if (STALE_CLIENTID(&lock->lk_new_clientid, nn)) goto out; /* validate and update open stateid and open seqid */ - status = nfs4_preprocess_seqid_op(&cstate->current_fh, + status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - CHECK_FH | OPEN_STATE, - &lock->lk_replay_owner, &open_stp, - lock); + &open_stp, nn); if (status) goto out; - open_sop = lock->lk_replay_owner; - /* create lockowner and lock stateid */ - fp = open_stp->st_file; - strhashval = lock_ownerstr_hashval(fp->fi_inode, - open_sop->so_client->cl_clientid.cl_id, - &lock->v.new.owner); - /* XXX: Do we need to check for duplicate stateowners on - * the same file, or should they just be allowed (and - * create new stateids)? */ - status = nfserr_resource; - lock_sop = alloc_init_lock_stateowner(strhashval, - open_sop->so_client, open_stp, lock); - if (lock_sop == NULL) - goto out; - lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); - if (lock_stp == NULL) - goto out; - } else { - /* lock (lock owner + lock stateid) already exists */ - status = nfs4_preprocess_seqid_op(&cstate->current_fh, - lock->lk_old_lock_seqid, - &lock->lk_old_lock_stateid, - CHECK_FH | LOCK_STATE, - &lock->lk_replay_owner, &lock_stp, lock); - if (status) + open_sop = openowner(open_stp->st_stateowner); + status = nfserr_bad_stateid; + if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, + &lock->v.new.clientid)) goto out; - lock_sop = lock->lk_replay_owner; - } - /* lock->lk_replay_owner and lock_stp have been created or found */ - filp = lock_stp->st_vfs_file; + status = lookup_or_create_lock_state(cstate, open_stp, lock, + &lock_stp, &new_state); + } else + status = nfs4_preprocess_seqid_op(cstate, + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + NFS4_LOCK_STID, &lock_stp, nn); + if (status) + goto out; + lock_sop = lockowner(lock_stp->st_stateowner); + + lkflg = setlkflg(lock->lk_type); + status = nfs4_check_openmode(lock_stp, lkflg); + if (status) + goto out; status = nfserr_grace; - if (nfs4_in_grace() && !lock->lk_reclaim) + if (locks_in_grace(net) && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && lock->lk_reclaim) + if (!locks_in_grace(net) && lock->lk_reclaim) + goto out; + + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; goto out; + } - locks_init_lock(&file_lock); + locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: - file_lock.fl_type = F_RDLCK; - cmd = F_SETLK; - break; + filp = find_readable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); + file_lock->fl_type = F_RDLCK; + break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - file_lock.fl_type = F_WRLCK; - cmd = F_SETLK; - break; + filp = find_writeable_file(lock_stp->st_file); + if (filp) + get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); + file_lock->fl_type = F_WRLCK; + break; default: status = nfserr_inval; goto out; } - file_lock.fl_owner = (fl_owner_t)lock_sop; - file_lock.fl_pid = current->tgid; - file_lock.fl_file = filp; - file_lock.fl_flags = FL_POSIX; - file_lock.fl_lmops = &nfsd_posix_mng_ops; - - file_lock.fl_start = lock->lk_offset; - if ((lock->lk_length == ~(u64)0) || - LOFF_OVERFLOW(lock->lk_offset, lock->lk_length)) - file_lock.fl_end = ~(u64)0; - else - file_lock.fl_end = lock->lk_offset + lock->lk_length - 1; - nfs4_transform_lock_offset(&file_lock); - - /* - * Try to lock the file in the VFS. - * Note: locks.c uses the BKL to protect the inode's lock list. - */ + if (!filp) { + status = nfserr_openmode; + goto out; + } + file_lock->fl_owner = (fl_owner_t)lock_sop; + file_lock->fl_pid = current->tgid; + file_lock->fl_file = filp; + file_lock->fl_flags = FL_POSIX; + file_lock->fl_lmops = &nfsd_posix_mng_ops; + file_lock->fl_start = lock->lk_offset; + file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length); + nfs4_transform_lock_offset(file_lock); + + conflock = locks_alloc_lock(); + if (!conflock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } - /* XXX?: Just to divert the locks_release_private at the start of - * locks_copy_lock: */ - locks_init_lock(&conflock); - err = vfs_lock_file(filp, cmd, &file_lock, &conflock); + err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, + update_stateid(&lock_stp->st_stid.sc_stateid); + memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, sizeof(stateid_t)); status = 0; break; case (EAGAIN): /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); - nfs4_set_lock_denied(&conflock, &lock->lk_denied); + nfs4_set_lock_denied(conflock, &lock->lk_denied); break; case (EDEADLK): status = nfserr_deadlock; break; - default: + default: dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); - status = nfserr_resource; + status = nfserrno(err); break; } out: - if (status && lock->lk_is_new && lock_sop) - release_stateowner(lock_sop); - if (lock->lk_replay_owner) { - nfs4_get_stateowner(lock->lk_replay_owner); - cstate->replay_owner = lock->lk_replay_owner; - } - nfs4_unlock_state(); + if (status && new_state) + release_lockowner(lock_sop); + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) + nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); + if (conflock) + locks_free_lock(conflock); return status; } /* + * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, + * so we do a temporary open here just to get an open file to pass to + * vfs_test_lock. (Arguably perhaps test_lock should be done with an + * inode operation.) + */ +static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) +{ + struct file *file; + __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + if (!err) { + err = nfserrno(vfs_test_lock(file, lock)); + nfsd_close(file); + } + return err; +} + +/* * LOCKT operation */ __be32 @@ -2743,41 +4581,44 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { struct inode *inode; - struct file file; - struct file_lock file_lock; - int error; + struct file_lock *file_lock = NULL; + struct nfs4_lockowner *lo; __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if (nfs4_in_grace()) + if (locks_in_grace(SVC_NET(rqstp))) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; - lockt->lt_stateowner = NULL; nfs4_lock_state(); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lockt->lt_clientid)) - goto out; + if (!nfsd4_has_session(cstate)) { + status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + if (status) + goto out; + } - if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { - dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); - if (status == nfserr_symlink) - status = nfserr_inval; + if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; - } inode = cstate->current_fh.fh_dentry->d_inode; - locks_init_lock(&file_lock); + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } + locks_init_lock(file_lock); switch (lockt->lt_type) { case NFS4_READ_LT: case NFS4_READW_LT: - file_lock.fl_type = F_RDLCK; + file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - file_lock.fl_type = F_WRLCK; + file_lock->fl_type = F_WRLCK; break; default: dprintk("NFSD: nfs4_lockt: bad lock type!\n"); @@ -2785,42 +4626,29 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lockt->lt_stateowner = find_lockstateowner_str(inode, - &lockt->lt_clientid, &lockt->lt_owner); - if (lockt->lt_stateowner) - file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; - file_lock.fl_pid = current->tgid; - file_lock.fl_flags = FL_POSIX; - file_lock.fl_lmops = &nfsd_posix_mng_ops; + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); + if (lo) + file_lock->fl_owner = (fl_owner_t)lo; + file_lock->fl_pid = current->tgid; + file_lock->fl_flags = FL_POSIX; - file_lock.fl_start = lockt->lt_offset; - if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) - file_lock.fl_end = ~(u64)0; - else - file_lock.fl_end = lockt->lt_offset + lockt->lt_length - 1; + file_lock->fl_start = lockt->lt_offset; + file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length); - nfs4_transform_lock_offset(&file_lock); + nfs4_transform_lock_offset(file_lock); - /* vfs_test_lock uses the struct file _only_ to resolve the inode. - * since LOCKT doesn't require an OPEN, and therefore a struct - * file may not exist, pass vfs_test_lock a struct file with - * only the dentry:inode set. - */ - memset(&file, 0, sizeof (struct file)); - file.f_path.dentry = cstate->current_fh.fh_dentry; - - status = nfs_ok; - error = vfs_test_lock(&file, &file_lock); - if (error) { - status = nfserrno(error); + status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock); + if (status) goto out; - } - if (file_lock.fl_type != F_UNLCK) { + + if (file_lock->fl_type != F_UNLCK) { status = nfserr_denied; - nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); + nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); return status; } @@ -2828,12 +4656,13 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; struct file *filp = NULL; - struct file_lock file_lock; + struct file_lock *file_lock = NULL; __be32 status; int err; - + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", (long long) locku->lu_offset, (long long) locku->lu_length); @@ -2843,50 +4672,49 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, - locku->lu_seqid, - &locku->lu_stateid, - CHECK_FH | LOCK_STATE, - &locku->lu_stateowner, &stp, NULL))) + status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, + &locku->lu_stateid, NFS4_LOCK_STID, + &stp, nn); + if (status) goto out; - - filp = stp->st_vfs_file; - BUG_ON(!filp); - locks_init_lock(&file_lock); - file_lock.fl_type = F_UNLCK; - file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner; - file_lock.fl_pid = current->tgid; - file_lock.fl_file = filp; - file_lock.fl_flags = FL_POSIX; - file_lock.fl_lmops = &nfsd_posix_mng_ops; - file_lock.fl_start = locku->lu_offset; - - if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) - file_lock.fl_end = ~(u64)0; - else - file_lock.fl_end = locku->lu_offset + locku->lu_length - 1; - nfs4_transform_lock_offset(&file_lock); - - /* - * Try to unlock the file in the VFS. - */ - err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL); + filp = find_any_file(stp->st_file); + if (!filp) { + status = nfserr_lock_range; + goto out; + } + file_lock = locks_alloc_lock(); + if (!file_lock) { + dprintk("NFSD: %s: unable to allocate lock!\n", __func__); + status = nfserr_jukebox; + goto out; + } + locks_init_lock(file_lock); + file_lock->fl_type = F_UNLCK; + file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); + file_lock->fl_pid = current->tgid; + file_lock->fl_file = filp; + file_lock->fl_flags = FL_POSIX; + file_lock->fl_lmops = &nfsd_posix_mng_ops; + file_lock->fl_start = locku->lu_offset; + + file_lock->fl_end = last_byte_offset(locku->lu_offset, + locku->lu_length); + nfs4_transform_lock_offset(file_lock); + + err = vfs_lock_file(filp, F_SETLK, file_lock, NULL); if (err) { dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n"); goto out_nfserr; } - /* - * OK, unlock succeeded; the only thing left to do is update the stateid. - */ - update_stateid(&stp->st_stateid); - memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); out: - if (locku->lu_stateowner) { - nfs4_get_stateowner(locku->lu_stateowner); - cstate->replay_owner = locku->lu_stateowner; - } - nfs4_unlock_state(); + nfsd4_bump_seqid(cstate, status); + if (!cstate->replay_owner) + nfs4_unlock_state(); + if (file_lock) + locks_free_lock(file_lock); return status; out_nfserr: @@ -2900,13 +4728,13 @@ out_nfserr: * 0: no locks held by lockowner */ static int -check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) +check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) { struct file_lock **flpp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->fi_inode; int status = 0; - lock_kernel(); + spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { status = 1; @@ -2914,7 +4742,7 @@ check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) } } out: - unlock_kernel(); + spin_unlock(&inode->i_lock); return status; } @@ -2925,41 +4753,37 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, { clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_stateowner *sop; - struct nfs4_stateid *stp; + struct nfs4_lockowner *lo; + struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; - int i; + unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - /* XXX check for lease expiration */ - - status = nfserr_stale_clientid; - if (STALE_CLIENTID(clid)) - return status; - nfs4_lock_state(); + status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + if (status) + goto out; + status = nfserr_locks_held; - /* XXX: we're doing a linear search through all the lockowners. - * Yipes! For now we'll just hope clients aren't really using - * release_lockowner much, but eventually we have to fix these - * data structures. */ INIT_LIST_HEAD(&matches); - for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - if (check_for_locks(stp->st_vfs_file, sop)) - goto out; - /* Note: so_perclient unused for lockowners, - * so it's OK to fool with here. */ - list_add(&sop->so_perclient, &matches); - } + + list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { + if (sop->so_is_open_owner) + continue; + if (!same_owner_str(sop, owner, clid)) + continue; + list_for_each_entry(stp, &sop->so_stateids, + st_perstateowner) { + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) + goto out; + list_add(&lo->lo_list, &matches); } } /* Clients probably won't expect us to return with some (but not all) @@ -2967,12 +4791,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, * have been checked. */ status = nfs_ok; while (!list_empty(&matches)) { - sop = list_entry(matches.next, struct nfs4_stateowner, - so_perclient); + lo = list_entry(matches.next, struct nfs4_lockowner, + lo_list); /* unhash_stateowner deletes so_perclient only * for openowners. */ - list_del(&sop->so_perclient); - release_stateowner(sop); + list_del(&lo->lo_list); + release_lockowner(lo); } out: nfs4_unlock_state(); @@ -2985,78 +4809,74 @@ alloc_reclaim(void) return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); } -int -nfs4_has_reclaimed_state(const char *name) +bool +nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn) { - unsigned int strhashval = clientstr_hashval(name); - struct nfs4_client *clp; + struct nfs4_client_reclaim *crp; - clp = find_confirmed_client_by_str(name, strhashval); - return clp ? 1 : 0; + crp = nfsd4_find_reclaim_client(name, nn); + return (crp && crp->cr_clp); } /* * failure => all reset bets are off, nfserr_no_grace... */ -int -nfs4_client_to_reclaim(const char *name) +struct nfs4_client_reclaim * +nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client_reclaim *crp = NULL; + struct nfs4_client_reclaim *crp; dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name); crp = alloc_reclaim(); - if (!crp) - return 0; - strhashval = clientstr_hashval(name); - INIT_LIST_HEAD(&crp->cr_strhash); - list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); - memcpy(crp->cr_recdir, name, HEXDIR_LEN); - reclaim_str_hashtbl_size++; - return 1; + if (crp) { + strhashval = clientstr_hashval(name); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_recdir, name, HEXDIR_LEN); + crp->cr_clp = NULL; + nn->reclaim_str_hashtbl_size++; + } + return crp; } -static void -nfs4_release_reclaim(void) +void +nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn) +{ + list_del(&crp->cr_strhash); + kfree(crp); + nn->reclaim_str_hashtbl_size--; +} + +void +nfs4_release_reclaim(struct nfsd_net *nn) { struct nfs4_client_reclaim *crp = NULL; int i; for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&reclaim_str_hashtbl[i])) { - crp = list_entry(reclaim_str_hashtbl[i].next, + while (!list_empty(&nn->reclaim_str_hashtbl[i])) { + crp = list_entry(nn->reclaim_str_hashtbl[i].next, struct nfs4_client_reclaim, cr_strhash); - list_del(&crp->cr_strhash); - kfree(crp); - reclaim_str_hashtbl_size--; + nfs4_remove_reclaim_record(crp, nn); } } - BUG_ON(reclaim_str_hashtbl_size); + WARN_ON_ONCE(nn->reclaim_str_hashtbl_size); } /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ -static struct nfs4_client_reclaim * -nfs4_find_reclaim_client(clientid_t *clid) +struct nfs4_client_reclaim * +nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) { unsigned int strhashval; - struct nfs4_client *clp; struct nfs4_client_reclaim *crp = NULL; + dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir); - /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid); - if (clp == NULL) - return NULL; - - dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", - clp->cl_name.len, clp->cl_name.data, - clp->cl_recdir); - - /* find clp->cl_name in reclaim_str_hashtbl */ - strhashval = clientstr_hashval(clp->cl_recdir); - list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { - if (same_name(crp->cr_recdir, clp->cl_recdir)) { + strhashval = clientstr_hashval(recdir); + list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { + if (same_name(crp->cr_recdir, recdir)) { return crp; } } @@ -3067,71 +4887,198 @@ nfs4_find_reclaim_client(clientid_t *clid) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid) +nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) { - return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; + struct nfs4_client *clp; + + /* find clientid in conf_id_hashtbl */ + clp = find_confirmed_client(clid, sessions, nn); + if (clp == NULL) + return nfserr_reclaim_bad; + + return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; } -/* initialization to perform at module load time: */ +#ifdef CONFIG_NFSD_FAULT_INJECTION -int -nfs4_state_init(void) +u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { - int i, status; + if (mark_client_expired(clp)) + return 0; + expire_client(clp); + return 1; +} - status = nfsd4_init_slabs(); - if (status) - return status; - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - INIT_LIST_HEAD(&conf_id_hashtbl[i]); - INIT_LIST_HEAD(&conf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_str_hashtbl[i]); - INIT_LIST_HEAD(&unconf_id_hashtbl[i]); - } - for (i = 0; i < FILE_HASH_SIZE; i++) { - INIT_LIST_HEAD(&file_hashtbl[i]); - } - for (i = 0; i < OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&ownerstr_hashtbl[i]); - INIT_LIST_HEAD(&ownerid_hashtbl[i]); - } - for (i = 0; i < STATEID_HASH_SIZE; i++) { - INIT_LIST_HEAD(&stateid_hashtbl[i]); - INIT_LIST_HEAD(&lockstateid_hashtbl[i]); - } - for (i = 0; i < LOCK_HASH_SIZE; i++) { - INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); - INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); - } - memset(&onestateid, ~0, sizeof(stateid_t)); - INIT_LIST_HEAD(&close_lru); - INIT_LIST_HEAD(&client_lru); - INIT_LIST_HEAD(&del_recall_lru); - for (i = 0; i < CLIENT_HASH_SIZE; i++) - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); - reclaim_str_hashtbl_size = 0; - return 0; +u64 nfsd_print_client(struct nfs4_client *clp, u64 num) +{ + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s\n", buf); + return 1; } -static void -nfsd4_load_reboot_recovery_data(void) +static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, + const char *type) { - int status; + char buf[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); +} - nfs4_lock_state(); - nfsd4_init_recdir(user_recovery_dirname); - status = nfsd4_recdir_load(); - nfs4_unlock_state(); - if (status) - printk("NFSD: Failure reading reboot recovery data\n"); +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) +{ + struct nfs4_openowner *oop; + struct nfs4_lockowner *lop, *lo_next; + struct nfs4_ol_stateid *stp, *st_next; + u64 count = 0; + + list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { + list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { + if (func) + func(lop); + if (++count == max) + return count; + } + } + } + + return count; +} + +u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) +{ + return nfsd_foreach_client_lock(clp, max, release_lockowner); +} + +u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_lock(clp, max, NULL); + nfsd_print_count(clp, count, "locked files"); + return count; +} + +static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) +{ + struct nfs4_openowner *oop, *next; + u64 count = 0; + + list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { + if (func) + func(oop); + if (++count == max) + break; + } + + return count; } -unsigned long -get_nfs4_grace_period(void) +u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) { - return max(user_lease_time, lease_time) * HZ; + return nfsd_foreach_client_open(clp, max, release_openowner); +} + +u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +{ + u64 count = nfsd_foreach_client_open(clp, max, NULL); + nfsd_print_count(clp, count, "open files"); + return count; +} + +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) +{ + struct nfs4_delegation *dp, *next; + u64 count = 0; + + lockdep_assert_held(&state_lock); + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) + list_move(&dp->dl_recall_lru, victims); + if (++count == max) + break; + } + return count; } +u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) +{ + struct nfs4_delegation *dp, *next; + LIST_HEAD(victims); + u64 count; + + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, &victims); + spin_unlock(&state_lock); + + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + revoke_delegation(dp); + + return count; +} + +u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) +{ + struct nfs4_delegation *dp, *next; + LIST_HEAD(victims); + u64 count; + + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, &victims); + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + nfsd_break_one_deleg(dp); + spin_unlock(&state_lock); + + return count; +} + +u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +{ + u64 count = 0; + + spin_lock(&state_lock); + count = nfsd_find_all_delegations(clp, max, NULL); + spin_unlock(&state_lock); + + nfsd_print_count(clp, count, "delegations"); + return count; +} + +u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) +{ + struct nfs4_client *clp, *next; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += func(clp, max - count); + if ((max != 0) && (count >= max)) + break; + } + + return count; +} + +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +{ + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; +} + +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -3153,135 +5100,275 @@ set_max_delegations(void) max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT); } -/* initialization to perform when the nfsd service is started: */ - -static void -__nfs4_state_start(void) +static int nfs4_state_create_net(struct net *net) { - unsigned long grace_time; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int i; - boot_time = get_seconds(); - grace_time = get_nfs_grace_period(); - lease_time = user_lease_time; - in_grace = 1; - printk(KERN_INFO "NFSD: starting %ld-second grace period\n", - grace_time/HZ); - laundry_wq = create_singlethread_workqueue("nfsd4"); - queue_delayed_work(laundry_wq, &laundromat_work, grace_time); - set_max_delegations(); + nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->conf_id_hashtbl) + goto err; + nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) * + CLIENT_HASH_SIZE, GFP_KERNEL); + if (!nn->unconf_id_hashtbl) + goto err_unconf_id; + nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!nn->ownerstr_hashtbl) + goto err_ownerstr; + nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * + LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); + if (!nn->lockowner_ino_hashtbl) + goto err_lockowner_ino; + nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * + SESSION_HASH_SIZE, GFP_KERNEL); + if (!nn->sessionid_hashtbl) + goto err_sessionid; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); + INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); + for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); + for (i = 0; i < SESSION_HASH_SIZE; i++) + INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); + nn->conf_name_tree = RB_ROOT; + nn->unconf_name_tree = RB_ROOT; + INIT_LIST_HEAD(&nn->client_lru); + INIT_LIST_HEAD(&nn->close_lru); + INIT_LIST_HEAD(&nn->del_recall_lru); + spin_lock_init(&nn->client_lock); + + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + get_net(net); + + return 0; + +err_sessionid: + kfree(nn->lockowner_ino_hashtbl); +err_lockowner_ino: + kfree(nn->ownerstr_hashtbl); +err_ownerstr: + kfree(nn->unconf_id_hashtbl); +err_unconf_id: + kfree(nn->conf_id_hashtbl); +err: + return -ENOMEM; } -void -nfs4_state_start(void) +static void +nfs4_state_destroy_net(struct net *net) { - if (nfs4_init) - return; - nfsd4_load_reboot_recovery_data(); - __nfs4_state_start(); - nfs4_init = 1; - return; + int i; + struct nfs4_client *clp = NULL; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->conf_id_hashtbl[i])) { + clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->unconf_id_hashtbl[i])) { + clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } + } + + kfree(nn->sessionid_hashtbl); + kfree(nn->lockowner_ino_hashtbl); + kfree(nn->ownerstr_hashtbl); + kfree(nn->unconf_id_hashtbl); + kfree(nn->conf_id_hashtbl); + put_net(net); } int -nfs4_in_grace(void) -{ - return in_grace; +nfs4_state_start_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + ret = nfs4_state_create_net(net); + if (ret) + return ret; + nfsd4_client_tracking_init(net); + nn->boot_time = get_seconds(); + locks_start_grace(net, &nn->nfsd4_manager); + nn->grace_ended = false; + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", + nn->nfsd4_grace, net); + queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); + return 0; } -time_t -nfs4_lease_time(void) +/* initialization to perform when the nfsd service is started: */ + +int +nfs4_state_start(void) { - return lease_time; + int ret; + + ret = set_callback_cred(); + if (ret) + return -ENOMEM; + laundry_wq = create_singlethread_workqueue("nfsd4"); + if (laundry_wq == NULL) { + ret = -ENOMEM; + goto out_recovery; + } + ret = nfsd4_create_callback_queue(); + if (ret) + goto out_free_laundry; + + set_max_delegations(); + + return 0; + +out_free_laundry: + destroy_workqueue(laundry_wq); +out_recovery: + return ret; } -static void -__nfs4_state_shutdown(void) +void +nfs4_state_shutdown_net(struct net *net) { - int i; - struct nfs4_client *clp = NULL; struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - for (i = 0; i < CLIENT_HASH_SIZE; i++) { - while (!list_empty(&conf_id_hashtbl[i])) { - clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); - expire_client(clp); - } - while (!list_empty(&unconf_str_hashtbl[i])) { - clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); - expire_client(clp); - } - } + cancel_delayed_work_sync(&nn->laundromat_work); + locks_end_grace(&nn->nfsd4_manager); + + nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); - spin_lock(&recall_lock); - list_for_each_safe(pos, next, &del_recall_lru) { + spin_lock(&state_lock); + list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_move(&dp->dl_recall_lru, &reaplist); } - spin_unlock(&recall_lock); + spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_del_init(&dp->dl_recall_lru); - unhash_delegation(dp); + destroy_delegation(dp); } - nfsd4_shutdown_recdir(); - nfs4_init = 0; + nfsd4_client_tracking_exit(net); + nfs4_state_destroy_net(net); + nfs4_unlock_state(); } void nfs4_state_shutdown(void) { - cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); destroy_workqueue(laundry_wq); - nfs4_lock_state(); - nfs4_release_reclaim(); - __nfs4_state_shutdown(); - nfs4_unlock_state(); + nfsd4_destroy_callback_queue(); } static void -nfs4_set_recdir(char *recdir) +get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { - nfs4_lock_state(); - strcpy(user_recovery_dirname, recdir); - nfs4_unlock_state(); + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); +} + +static void +put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) +{ + if (cstate->minorversion) { + memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } +} + +void +clear_current_stateid(struct nfsd4_compound_state *cstate) +{ + CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); } /* - * Change the NFSv4 recovery directory to recdir. + * functions to set current state id */ -int -nfs4_reset_recoverydir(char *recdir) +void +nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) { - int status; - struct nameidata nd; + put_stateid(cstate, &odp->od_stateid); +} - status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); - if (status) - return status; - status = -ENOTDIR; - if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { - nfs4_set_recdir(recdir); - status = 0; - } - path_put(&nd.path); - return status; +void +nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + put_stateid(cstate, &open->op_stateid); +} + +void +nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + put_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock) +{ + put_stateid(cstate, &lock->lk_resp_stateid); } /* - * Called when leasetime is changed. - * - * The only way the protocol gives us to handle on-the-fly lease changes is to - * simulate a reboot. Instead of doing that, we just wait till the next time - * we start to register any changes in lease time. If the administrator - * really wants to change the lease time *now*, they can go ahead and bring - * nfsd down and then back up again after changing the lease time. + * functions to consume current state id */ + +void +nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) +{ + get_stateid(cstate, &odp->od_stateid); +} + +void +nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp) +{ + get_stateid(cstate, &drp->dr_stateid); +} + +void +nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp) +{ + get_stateid(cstate, &fsp->fr_stateid); +} + +void +nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) +{ + get_stateid(cstate, &setattr->sa_stateid); +} + +void +nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + get_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) +{ + get_stateid(cstate, &locku->lu_stateid); +} + +void +nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read) +{ + get_stateid(cstate, &read->rd_stateid); +} + void -nfs4_reset_lease(time_t leasetime) +nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write) { - lock_kernel(); - user_lease_time = leasetime; - unlock_kernel(); + get_stateid(cstate, &write->wr_stateid); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0e6a179ecca..944275c8f56 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1,6 +1,4 @@ /* - * fs/nfs/nfs4xdr.c - * * Server-side XDR for NFSv4 * * Copyright (c) 2002 The Regents of the University of Michigan. @@ -42,23 +40,26 @@ * at the end of nfs4svc_decode_compoundargs. */ -#include <linux/param.h> -#include <linux/smp.h> -#include <linux/fs.h> +#include <linux/slab.h> #include <linux/namei.h> -#include <linux/vfs.h> -#include <linux/sunrpc/xdr.h> -#include <linux/sunrpc/svc.h> -#include <linux/sunrpc/clnt.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/state.h> -#include <linux/nfsd/xdr4.h> -#include <linux/nfsd_idmap.h> -#include <linux/nfs4.h> -#include <linux/nfs4_acl.h> -#include <linux/sunrpc/gss_api.h> +#include <linux/statfs.h> +#include <linux/utsname.h> +#include <linux/pagemap.h> #include <linux/sunrpc/svcauth_gss.h> +#include "idmap.h" +#include "acl.h" +#include "xdr4.h" +#include "vfs.h" +#include "state.h" +#include "cache.h" +#include "netns.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> +#endif + + #define NFSDDBG_FACILITY NFSDDBG_XDR /* @@ -70,30 +71,20 @@ #define NFS4_REFERRAL_FSID_MINOR 0x8000000ULL static __be32 -check_filename(char *str, int len, __be32 err) +check_filename(char *str, int len) { int i; if (len == 0) return nfserr_inval; if (isdotent(str, len)) - return err; + return nfserr_badname; for (i = 0; i < len; i++) if (str[i] == '/') - return err; + return nfserr_badname; return 0; } -/* - * START OF "GENERIC" DECODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ #define DECODE_HEAD \ __be32 *p; \ __be32 status @@ -107,16 +98,6 @@ xdr_error: \ status = nfserr_bad_xdr; \ goto out -#define READ32(x) (x) = ntohl(*p++) -#define READ64(x) do { \ - (x) = (u64)ntohl(*p++) << 32; \ - (x) |= ntohl(*p++); \ -} while (0) -#define READTIME(x) do { \ - p++; \ - (x) = ntohl(*p++); \ - p++; \ -} while (0) #define READMEM(x,nbytes) do { \ x = (char *)p; \ p += XDR_QUADLEN(nbytes); \ @@ -148,6 +129,19 @@ xdr_error: \ } \ } while (0) +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -175,21 +169,26 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; } +static int zero_clientid(clientid_t *clid) +{ + return (clid->cl_boot == 0) && (clid->cl_id == 0); +} + +/** + * defer_free - mark an allocation as deferred freed + * @argp: NFSv4 compound argument structure to be freed with + * @release: release callback to free @p, typically kfree() + * @p: pointer to be freed + * + * Marks @p to be freed when processing the compound operation + * described in @argp finishes. + */ static int defer_free(struct nfsd4_compoundargs *argp, void (*release)(const void *), void *p) @@ -206,13 +205,22 @@ defer_free(struct nfsd4_compoundargs *argp, return 0; } +/** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with + * @p: pointer to be duplicated + * @nbytes: length to be duplicated + * + * Returns a pointer to a copy of @nbytes bytes of memory at @p + * that are preserved until processing of the NFSv4 compound + * operation described by @argp finishes. + */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { if (p == argp->tmp) { - p = kmalloc(nbytes, GFP_KERNEL); + p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); if (!p) return NULL; - memcpy(p, argp->tmp, nbytes); } else { BUG_ON(p != argp->tmpp); argp->tmpp = NULL; @@ -232,139 +240,131 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) bmval[0] = 0; bmval[1] = 0; + bmval[2] = 0; READ_BUF(4); - READ32(bmlen); + bmlen = be32_to_cpup(p++); if (bmlen > 1000) goto xdr_error; READ_BUF(bmlen << 2); if (bmlen > 0) - READ32(bmval[0]); + bmval[0] = be32_to_cpup(p++); if (bmlen > 1) - READ32(bmval[1]); + bmval[1] = be32_to_cpup(p++); + if (bmlen > 2) + bmval[2] = be32_to_cpup(p++); DECODE_TAIL; } static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, - struct nfs4_acl **acl) +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; + u64 sec; char *buf; - int host_err; DECODE_HEAD; iattr->ia_valid = 0; if ((status = nfsd4_decode_bitmap(argp, bmval))) return status; - /* - * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; - * read-only attributes return ERR_INVAL. - */ - if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) - return nfserr_attrnotsupp; - if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) - return nfserr_inval; - READ_BUF(4); - READ32(expected_len); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { READ_BUF(8); len += 8; - READ64(iattr->ia_size); + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; - READ32(nace); + nace = be32_to_cpup(p++); if (nace > NFS4_ACL_MAX) - return nfserr_resource; + return nfserr_fbig; *acl = nfs4_acl_new(nace); - if (*acl == NULL) { - host_err = -ENOMEM; - goto out_nfserr; - } + if (*acl == NULL) + return nfserr_jukebox; + defer_free(argp, kfree, *acl); (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; - READ32(ace->type); - READ32(ace->flag); - READ32(ace->access_mask); - READ32(dummy32); + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += XDR_QUADLEN(dummy32) << 2; READMEM(buf, dummy32); ace->whotype = nfs4_acl_get_whotype(buf, dummy32); - host_err = 0; + status = nfs_ok; if (ace->whotype != NFS4_ACL_WHO_NAMED) - ace->who = 0; + ; else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - host_err = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace->who); + status = nfsd_map_name_to_gid(argp->rqstp, + buf, dummy32, &ace->who_gid); else - host_err = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace->who); - if (host_err) - goto out_nfserr; + status = nfsd_map_name_to_uid(argp->rqstp, + buf, dummy32, &ace->who_uid); + if (status) + return status; } } else *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { READ_BUF(4); len += 4; - READ32(iattr->ia_mode); + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + return status; iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + return status; iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); - READ32(iattr->ia_atime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_atime.tv_sec = (time_t)sec; + iattr->ia_atime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -376,35 +376,19 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia goto xdr_error; } } - if (bmval[1] & FATTR4_WORD1_TIME_METADATA) { - /* We require the high 32 bits of 'seconds' to be 0, and we ignore - all 32 bits of 'nseconds'. */ - READ_BUF(12); - len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_ctime.tv_sec); - READ32(iattr->ia_ctime.tv_nsec); - if (iattr->ia_ctime.tv_nsec >= (u32)1000000000) - return nfserr_inval; - iattr->ia_valid |= ATTR_CTIME; - } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; - READ32(dummy32); + dummy32 = be32_to_cpup(p++); switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: /* We require the high 32 bits of 'seconds' to be 0, and we ignore all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); - READ32(iattr->ia_mtime.tv_nsec); + p = xdr_decode_hyper(p, &sec); + iattr->ia_mtime.tv_sec = sec; + iattr->ia_mtime.tv_nsec = be32_to_cpup(p++); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -416,14 +400,53 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia goto xdr_error; } } - if (len != expected_len) + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + label->len = dummy32; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 + || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 + || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) + READ_BUF(expected_len - len); + else if (len != expected_len) goto xdr_error; DECODE_TAIL; +} -out_nfserr: - status = nfserrno(host_err); - goto out; +static __be32 +nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + sid->si_generation = be32_to_cpup(p++); + COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; } static __be32 @@ -432,21 +455,122 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access DECODE_HEAD; READ_BUF(4); - READ32(access->ac_req_access); + access->ac_req_access = be32_to_cpup(p++); DECODE_TAIL; } +static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + DECODE_HEAD; + u32 dummy, uid, gid; + char *machine_name; + int i; + int nr_secflavs; + + /* callback_sec_params4 */ + READ_BUF(4); + nr_secflavs = be32_to_cpup(p++); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; + for (i = 0; i < nr_secflavs; ++i) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + switch (dummy) { + case RPC_AUTH_NULL: + /* Nothing to read */ + if (cbs->flavor == (u32)(-1)) + cbs->flavor = RPC_AUTH_NULL; + break; + case RPC_AUTH_UNIX: + READ_BUF(8); + /* stamp */ + dummy = be32_to_cpup(p++); + + /* machine name */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); + + /* more gids */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + if (cbs->flavor == (u32)(-1)) { + kuid_t kuid = make_kuid(&init_user_ns, uid); + kgid_t kgid = make_kgid(&init_user_ns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid" + "uid or gid ignoring!\n"); + } + } + break; + case RPC_AUTH_GSS: + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + dummy = be32_to_cpup(p++); + /* gcbp_handle_from_server */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + break; + default: + dprintk("Illegal callback secflavor\n"); + return nfserr_inval; + } + } + DECODE_TAIL; +} + +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + DECODE_HEAD; + + READ_BUF(4); + bc->bc_cb_program = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); + COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + bcts->dir = be32_to_cpup(p++); + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ + DECODE_TAIL; +} + static __be32 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { DECODE_HEAD; - close->cl_stateowner = NULL; - READ_BUF(4 + sizeof(stateid_t)); - READ32(close->cl_seqid); - READ32(close->cl_stateid.si_generation); - COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ_BUF(4); + close->cl_seqid = be32_to_cpup(p++); + return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; } @@ -458,8 +582,8 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit DECODE_HEAD; READ_BUF(12); - READ64(commit->co_offset); - READ32(commit->co_count); + p = xdr_decode_hyper(p, &commit->co_offset); + commit->co_count = be32_to_cpup(p++); DECODE_TAIL; } @@ -470,19 +594,30 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create DECODE_HEAD; READ_BUF(4); - READ32(create->cr_type); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: READ_BUF(4); - READ32(create->cr_linklen); + create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: READ_BUF(8); - READ32(create->cr_specdata1); - READ32(create->cr_specdata2); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -492,13 +627,15 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create } READ_BUF(4); - READ32(create->cr_namelen); + create->cr_namelen = be32_to_cpup(p++); READ_BUF(create->cr_namelen); SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; - if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) + status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, + &create->cr_acl, &create->cr_label); + if (status) goto out; DECODE_TAIL; @@ -507,13 +644,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create static inline __be32 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) { - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - READ32(dr->dr_stateid.si_generation); - COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); - - DECODE_TAIL; + return nfsd4_decode_stateid(argp, &dr->dr_stateid); } static inline __be32 @@ -528,10 +659,10 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) DECODE_HEAD; READ_BUF(4); - READ32(link->li_namelen); + link->li_namelen = be32_to_cpup(p++); READ_BUF(link->li_namelen); SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + if ((status = check_filename(link->li_name, link->li_namelen))) return status; DECODE_TAIL; @@ -542,35 +673,36 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { DECODE_HEAD; - lock->lk_replay_owner = NULL; /* * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ READ_BUF(28); - READ32(lock->lk_type); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(lock->lk_reclaim); - READ64(lock->lk_offset); - READ64(lock->lk_length); - READ32(lock->lk_is_new); + lock->lk_reclaim = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); + lock->lk_is_new = be32_to_cpup(p++); if (lock->lk_is_new) { - READ_BUF(36); - READ32(lock->lk_new_open_seqid); - READ32(lock->lk_new_open_stateid.si_generation); - - COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ32(lock->lk_new_lock_seqid); + READ_BUF(4); + lock->lk_new_open_seqid = be32_to_cpup(p++); + status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); + if (status) + return status; + READ_BUF(8 + sizeof(clientid_t)); + lock->lk_new_lock_seqid = be32_to_cpup(p++); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - READ32(lock->lk_new_owner.len); + lock->lk_new_owner.len = be32_to_cpup(p++); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { - READ_BUF(20); - READ32(lock->lk_old_lock_stateid.si_generation); - COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ32(lock->lk_old_lock_seqid); + status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); + if (status) + return status; + READ_BUF(4); + lock->lk_old_lock_seqid = be32_to_cpup(p++); } DECODE_TAIL; @@ -582,13 +714,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) DECODE_HEAD; READ_BUF(32); - READ32(lockt->lt_type); + lockt->lt_type = be32_to_cpup(p++); if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) goto xdr_error; - READ64(lockt->lt_offset); - READ64(lockt->lt_length); + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); COPYMEM(&lockt->lt_clientid, 8); - READ32(lockt->lt_owner.len); + lockt->lt_owner.len = be32_to_cpup(p++); READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); @@ -600,16 +732,17 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) { DECODE_HEAD; - locku->lu_stateowner = NULL; - READ_BUF(24 + sizeof(stateid_t)); - READ32(locku->lu_type); + READ_BUF(8); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; - READ32(locku->lu_seqid); - READ32(locku->lu_stateid.si_generation); - COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ64(locku->lu_offset); - READ64(locku->lu_length); + locku->lu_seqid = be32_to_cpup(p++); + status = nfsd4_decode_stateid(argp, &locku->lu_stateid); + if (status) + return status; + READ_BUF(16); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); DECODE_TAIL; } @@ -620,51 +753,155 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_HEAD; READ_BUF(4); - READ32(lookup->lo_len); + lookup->lo_len = be32_to_cpup(p++); READ_BUF(lookup->lo_len); SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + if ((status = check_filename(lookup->lo_name, lookup->lo_len))) return status; DECODE_TAIL; } +static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) +{ + __be32 *p; + u32 w; + + READ_BUF(4); + w = be32_to_cpup(p++); + *share_access = w & NFS4_SHARE_ACCESS_MASK; + *deleg_want = w & NFS4_SHARE_WANT_MASK; + if (deleg_when) + *deleg_when = w & NFS4_SHARE_WHEN_MASK; + + switch (w & NFS4_SHARE_ACCESS_MASK) { + case NFS4_SHARE_ACCESS_READ: + case NFS4_SHARE_ACCESS_WRITE: + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_ACCESS_MASK; + if (!w) + return nfs_ok; + if (!argp->minorversion) + return nfserr_bad_xdr; + switch (w & NFS4_SHARE_WANT_MASK) { + case NFS4_SHARE_WANT_NO_PREFERENCE: + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + case NFS4_SHARE_WANT_NO_DELEG: + case NFS4_SHARE_WANT_CANCEL: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_WANT_MASK; + if (!w) + return nfs_ok; + + if (!deleg_when) /* open_downgrade */ + return nfserr_inval; + switch (w) { + case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: + case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: + case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL | + NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): + return nfs_ok; + } +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) +{ + __be32 *p; + + READ_BUF(4); + *x = be32_to_cpup(p++); + /* Note: unlinke access bits, deny bits may be zero. */ + if (*x & ~NFS4_SHARE_DENY_BOTH) + return nfserr_bad_xdr; + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + + READ_BUF(4); + o->len = be32_to_cpup(p++); + + if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + + READ_BUF(o->len); + SAVEMEM(o->data, o->len); + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { DECODE_HEAD; + u32 dummy; memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; - open->op_stateowner = NULL; + open->op_openowner = NULL; + open->op_xdr_error = 0; /* seqid, share_access, share_deny, clientid, ownerlen */ - READ_BUF(16 + sizeof(clientid_t)); - READ32(open->op_seqid); - READ32(open->op_share_access); - READ32(open->op_share_deny); + READ_BUF(4); + open->op_seqid = be32_to_cpup(p++); + /* decode, yet ignore deleg_when until supported */ + status = nfsd4_decode_share_access(argp, &open->op_share_access, + &open->op_deleg_want, &dummy); + if (status) + goto xdr_error; + status = nfsd4_decode_share_deny(argp, &open->op_share_deny); + if (status) + goto xdr_error; + READ_BUF(sizeof(clientid_t)); COPYMEM(&open->op_clientid, sizeof(clientid_t)); - READ32(open->op_owner.len); - - /* owner, open_flag */ - READ_BUF(open->op_owner.len + 4); - SAVEMEM(open->op_owner.data, open->op_owner.len); - READ32(open->op_create); + status = nfsd4_decode_opaque(argp, &open->op_owner); + if (status) + goto xdr_error; + READ_BUF(4); + open->op_create = be32_to_cpup(p++); switch (open->op_create) { case NFS4_OPEN_NOCREATE: break; case NFS4_OPEN_CREATE: READ_BUF(4); - READ32(open->op_createmode); + open->op_createmode = be32_to_cpup(p++); switch (open->op_createmode) { case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: - if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl, &open->op_label); + if (status) goto out; break; case NFS4_CREATE_EXCLUSIVE: - READ_BUF(8); - COPYMEM(open->op_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + goto xdr_error; + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl, &open->op_label); + if (status) + goto out; break; default: goto xdr_error; @@ -676,28 +913,43 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* open_claim */ READ_BUF(4); - READ32(open->op_claim_type); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: READ_BUF(4); - READ32(open->op_fname.len); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: READ_BUF(4); - READ32(open->op_delegate_type); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: - READ_BUF(sizeof(stateid_t) + 4); - COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); - READ32(open->op_fname.len); + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) + return status; + READ_BUF(4); + open->op_fname.len = be32_to_cpup(p++); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) + return status; + break; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + if (argp->minorversion < 1) + goto xdr_error; + /* void */ + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + if (argp->minorversion < 1) + goto xdr_error; + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) return status; break; default: @@ -711,13 +963,16 @@ static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { DECODE_HEAD; - - open_conf->oc_stateowner = NULL; - READ_BUF(4 + sizeof(stateid_t)); - READ32(open_conf->oc_req_stateid.si_generation); - COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ32(open_conf->oc_seqid); - + + if (argp->minorversion >= 1) + return nfserr_notsupp; + + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); + if (status) + return status; + READ_BUF(4); + open_conf->oc_seqid = be32_to_cpup(p++); + DECODE_TAIL; } @@ -726,14 +981,18 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d { DECODE_HEAD; - open_down->od_stateowner = NULL; - READ_BUF(12 + sizeof(stateid_t)); - READ32(open_down->od_stateid.si_generation); - COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ32(open_down->od_seqid); - READ32(open_down->od_share_access); - READ32(open_down->od_share_deny); - + status = nfsd4_decode_stateid(argp, &open_down->od_stateid); + if (status) + return status; + READ_BUF(4); + open_down->od_seqid = be32_to_cpup(p++); + status = nfsd4_decode_share_access(argp, &open_down->od_share_access, + &open_down->od_deleg_want, NULL); + if (status) + return status; + status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); + if (status) + return status; DECODE_TAIL; } @@ -743,7 +1002,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) DECODE_HEAD; READ_BUF(4); - READ32(putfh->pf_fhlen); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) goto xdr_error; READ_BUF(putfh->pf_fhlen); @@ -753,15 +1012,24 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) +{ + if (argp->minorversion == 0) + return nfs_ok; + return nfserr_notsupp; +} + +static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; - READ_BUF(sizeof(stateid_t) + 12); - READ32(read->rd_stateid.si_generation); - COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ64(read->rd_offset); - READ32(read->rd_length); + status = nfsd4_decode_stateid(argp, &read->rd_stateid); + if (status) + return status; + READ_BUF(12); + p = xdr_decode_hyper(p, &read->rd_offset); + read->rd_length = be32_to_cpup(p++); DECODE_TAIL; } @@ -772,10 +1040,10 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read DECODE_HEAD; READ_BUF(24); - READ64(readdir->rd_cookie); + p = xdr_decode_hyper(p, &readdir->rd_cookie); COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ - READ32(readdir->rd_maxcount); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) goto out; @@ -788,10 +1056,10 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove DECODE_HEAD; READ_BUF(4); - READ32(remove->rm_namelen); + remove->rm_namelen = be32_to_cpup(p++); READ_BUF(remove->rm_namelen); SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + if ((status = check_filename(remove->rm_name, remove->rm_namelen))) return status; DECODE_TAIL; @@ -803,15 +1071,15 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename DECODE_HEAD; READ_BUF(4); - READ32(rename->rn_snamelen); + rename->rn_snamelen = be32_to_cpup(p++); READ_BUF(rename->rn_snamelen + 4); SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ32(rename->rn_tnamelen); + rename->rn_tnamelen = be32_to_cpup(p++); READ_BUF(rename->rn_tnamelen); SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) return status; DECODE_TAIL; @@ -822,6 +1090,9 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(sizeof(clientid_t)); COPYMEM(clientid, sizeof(clientid_t)); @@ -835,51 +1106,63 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, DECODE_HEAD; READ_BUF(4); - READ32(secinfo->si_namelen); + secinfo->si_namelen = be32_to_cpup(p++); READ_BUF(secinfo->si_namelen); SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen, - nfserr_noent); + status = check_filename(secinfo->si_name, secinfo->si_namelen); if (status) return status; DECODE_TAIL; } static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) { DECODE_HEAD; - READ_BUF(sizeof(stateid_t)); - READ32(setattr->sa_stateid.si_generation); - COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); - if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) - goto out; - + READ_BUF(4); + sin->sin_style = be32_to_cpup(p++); DECODE_TAIL; } static __be32 +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +{ + __be32 status; + + status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + if (status) + return status; + return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, + &setattr->sa_acl, &setattr->sa_label); +} + +static __be32 nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) { DECODE_HEAD; - READ_BUF(12); - COPYMEM(setclientid->se_verf.data, 8); - READ32(setclientid->se_namelen); + if (argp->minorversion >= 1) + return nfserr_notsupp; - READ_BUF(setclientid->se_namelen + 8); - SAVEMEM(setclientid->se_name, setclientid->se_namelen); - READ32(setclientid->se_callback_prog); - READ32(setclientid->se_callback_netid_len); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); + + status = nfsd4_decode_opaque(argp, &setclientid->se_name); + if (status) + return nfserr_bad_xdr; + READ_BUF(8); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_netid_len + 4); SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ32(setclientid->se_callback_addr_len); + setclientid->se_callback_addr_len = be32_to_cpup(p++); READ_BUF(setclientid->se_callback_addr_len + 4); SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ32(setclientid->se_callback_ident); + setclientid->se_callback_ident = be32_to_cpup(p++); DECODE_TAIL; } @@ -889,9 +1172,12 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; - READ_BUF(8 + sizeof(nfs4_verifier)); + if (argp->minorversion >= 1) + return nfserr_notsupp; + + READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); - COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); + COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); DECODE_TAIL; } @@ -900,33 +1186,16 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { -#if 0 - struct nfsd4_compoundargs save = { - .p = argp->p, - .end = argp->end, - .rqstp = argp->rqstp, - }; - u32 ve_bmval[2]; - struct iattr ve_iattr; /* request */ - struct nfs4_acl *ve_acl; /* request */ -#endif DECODE_HEAD; if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) goto out; /* For convenience's sake, we compare raw xdr'd attributes in - * nfsd4_proc_verify; however we still decode here just to return - * correct error in case of bad xdr. */ -#if 0 - status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); - if (status == nfserr_inval) { - status = nfserrno(status); - goto out; - } -#endif + * nfsd4_proc_verify */ + READ_BUF(4); - READ32(verify->ve_attrlen); + verify->ve_attrlen = be32_to_cpup(p++); READ_BUF(verify->ve_attrlen); SAVEMEM(verify->ve_attrval, verify->ve_attrlen); @@ -937,18 +1206,18 @@ static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { int avail; - int v; int len; DECODE_HEAD; - READ_BUF(sizeof(stateid_opaque_t) + 20); - READ32(write->wr_stateid.si_generation); - COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); - READ64(write->wr_offset); - READ32(write->wr_stable_how); + status = nfsd4_decode_stateid(argp, &write->wr_stateid); + if (status) + return status; + READ_BUF(16); + p = xdr_decode_hyper(p, &write->wr_offset); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > 2) goto xdr_error; - READ32(write->wr_buflen); + write->wr_buflen = be32_to_cpup(p++); /* Sorry .. no magic macros for this.. * * READ_BUF(write->wr_buflen); @@ -960,27 +1229,26 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) __FILE__, __LINE__); goto xdr_error; } - argp->rqstp->rq_vec[0].iov_base = p; - argp->rqstp->rq_vec[0].iov_len = avail; - v = 0; - len = write->wr_buflen; - while (len > argp->rqstp->rq_vec[v].iov_len) { - len -= argp->rqstp->rq_vec[v].iov_len; - v++; - argp->rqstp->rq_vec[v].iov_base = page_address(argp->pagelist[0]); + write->wr_head.iov_base = p; + write->wr_head.iov_len = avail; + write->wr_pagelist = argp->pagelist; + + len = XDR_QUADLEN(write->wr_buflen) << 2; + if (len >= avail) { + int pages; + + len -= avail; + + pages = len >> PAGE_SHIFT; + argp->pagelist += pages; + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + + argp->p = (__be32 *)page_address(argp->pagelist[0]); argp->pagelist++; - if (argp->pagelen >= PAGE_SIZE) { - argp->rqstp->rq_vec[v].iov_len = PAGE_SIZE; - argp->pagelen -= PAGE_SIZE; - } else { - argp->rqstp->rq_vec[v].iov_len = argp->pagelen; - argp->pagelen -= len; - } + argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } - argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len); - argp->p = (__be32*) (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); - argp->rqstp->rq_vec[v].iov_len = len; - write->wr_vlen = v+1; + argp->p += XDR_QUADLEN(len); DECODE_TAIL; } @@ -990,34 +1258,381 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel { DECODE_HEAD; + if (argp->minorversion >= 1) + return nfserr_notsupp; + READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - READ32(rlockowner->rl_owner.len); + rlockowner->rl_owner.len = be32_to_cpup(p++); READ_BUF(rlockowner->rl_owner.len); READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) + return nfserr_inval; DECODE_TAIL; } static __be32 +nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + int dummy, tmp; + DECODE_HEAD; + + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); + + status = nfsd4_decode_opaque(argp, &exid->clname); + if (status) + return nfserr_bad_xdr; + + READ_BUF(4); + exid->flags = be32_to_cpup(p++); + + /* Ignore state_protect4_a */ + READ_BUF(4); + exid->spa_how = be32_to_cpup(p++); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + + /* spo_must_allow */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + break; + case SP4_SSV: + /* ssp_ops */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + + /* ssp_hash_algs<> */ + READ_BUF(4); + tmp = be32_to_cpup(p++); + while (tmp--) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ssp_encr_algs<> */ + READ_BUF(4); + tmp = be32_to_cpup(p++); + while (tmp--) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ssp_window and ssp_num_gss_handles */ + READ_BUF(8); + dummy = be32_to_cpup(p++); + dummy = be32_to_cpup(p++); + break; + default: + goto xdr_error; + } + + /* Ignore Implementation ID */ + READ_BUF(4); /* nfs_impl_id4 array length */ + dummy = be32_to_cpup(p++); + + if (dummy > 1) + goto xdr_error; + + if (dummy == 1) { + /* nii_domain */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_name */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + + /* nii_date */ + READ_BUF(12); + p += 3; + } + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, + struct nfsd4_create_session *sess) +{ + DECODE_HEAD; + u32 dummy; + + READ_BUF(16); + COPYMEM(&sess->clientid, 8); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); + + /* Fore channel attrs */ + READ_BUF(28); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); + if (sess->fore_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); + } else if (sess->fore_channel.nr_rdma_attrs > 1) { + dprintk("Too many fore channel attr bitmaps!\n"); + goto xdr_error; + } + + /* Back channel attrs */ + READ_BUF(28); + dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); + if (sess->back_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); + } else if (sess->back_channel.nr_rdma_attrs > 1) { + dprintk("Too many back channel attr bitmaps!\n"); + goto xdr_error; + } + + READ_BUF(4); + sess->callback_prog = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &sess->cb_sec); + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_session *destroy_session) +{ + DECODE_HEAD; + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, + struct nfsd4_free_stateid *free_stateid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); + COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); + COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + int i; + __be32 *p, status; + struct nfsd4_test_stateid_id *stateid; + + READ_BUF(4); + test_stateid->ts_num_ids = ntohl(*p++); + + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); + + for (i = 0; i < test_stateid->ts_num_ids; i++) { + stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + if (!stateid) { + status = nfserrno(-ENOMEM); + goto out; + } + + defer_free(argp, kfree, stateid); + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + + status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); + if (status) + goto out; + } + + status = 0; +out: + return status; +xdr_error: + dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); + status = nfserr_bad_xdr; + goto out; +} + +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) +{ + DECODE_HEAD; + + READ_BUF(8); + COPYMEM(&dc->clientid, 8); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) +{ + DECODE_HEAD; + + READ_BUF(4); + rc->rca_one_fs = be32_to_cpup(p++); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) +{ + return nfs_ok; +} + +static __be32 +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) +{ + return nfserr_notsupp; +} + +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); + +static nfsd4_dec nfsd4_dec_ops[] = { + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, + + /* new operations for NFSv4.1 */ + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, +}; + +static inline bool +nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) +{ + if (op->opnum < FIRST_NFS4_OP) + return false; + else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) + return false; + else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) + return false; + else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) + return false; + return true; +} + +static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; + bool cachethis = false; + int auth_slack= argp->rqstp->rq_auth_slack; + int max_reply = auth_slack + 8; /* opcnt, status */ + int readcount = 0; + int readbytes = 0; int i; - /* - * XXX: According to spec, we should check the tag - * for UTF-8 compliance. I'm postponing this for - * now because it seems that some clients do use - * binary tags. - */ READ_BUF(4); - READ32(argp->taglen); + argp->taglen = be32_to_cpup(p++); READ_BUF(argp->taglen + 8); SAVEMEM(argp->tag, argp->taglen); - READ32(argp->minorversion); - READ32(argp->opcnt); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); if (argp->taglen > NFSD4_MAX_TAGLEN) goto xdr_error; @@ -1033,399 +1648,330 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } + if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + argp->opcnt = 0; + for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; - /* - * We can't use READ_BUF() here because we need to handle - * a missing opcode as an OP_WRITE + 1. So we need to check - * to see if we're truly at the end of our buffer or if there - * is another page we need to flip to. - */ - - if (argp->p == argp->end) { - if (argp->pagelen < 4) { - /* There isn't an opcode still on the wire */ - op->opnum = OP_WRITE + 1; - op->status = nfserr_bad_xdr; - argp->opcnt = i+1; - break; - } - - /* - * False alarm. We just hit a page boundary, but there - * is still data available. Move pointer across page - * boundary. *snip from READ_BUF* - */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } - } - op->opnum = ntohl(*argp->p++); + READ_BUF(4); + op->opnum = be32_to_cpup(p++); - switch (op->opnum) { - case 2: /* Reserved operation */ - op->opnum = OP_ILLEGAL; - if (argp->minorversion == 0) - op->status = nfserr_op_illegal; - else - op->status = nfserr_minor_vers_mismatch; - break; - case OP_ACCESS: - op->status = nfsd4_decode_access(argp, &op->u.access); - break; - case OP_CLOSE: - op->status = nfsd4_decode_close(argp, &op->u.close); - break; - case OP_COMMIT: - op->status = nfsd4_decode_commit(argp, &op->u.commit); - break; - case OP_CREATE: - op->status = nfsd4_decode_create(argp, &op->u.create); - break; - case OP_DELEGRETURN: - op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn); - break; - case OP_GETATTR: - op->status = nfsd4_decode_getattr(argp, &op->u.getattr); - break; - case OP_GETFH: - op->status = nfs_ok; - break; - case OP_LINK: - op->status = nfsd4_decode_link(argp, &op->u.link); - break; - case OP_LOCK: - op->status = nfsd4_decode_lock(argp, &op->u.lock); - break; - case OP_LOCKT: - op->status = nfsd4_decode_lockt(argp, &op->u.lockt); - break; - case OP_LOCKU: - op->status = nfsd4_decode_locku(argp, &op->u.locku); - break; - case OP_LOOKUP: - op->status = nfsd4_decode_lookup(argp, &op->u.lookup); - break; - case OP_LOOKUPP: - op->status = nfs_ok; - break; - case OP_NVERIFY: - op->status = nfsd4_decode_verify(argp, &op->u.nverify); - break; - case OP_OPEN: - op->status = nfsd4_decode_open(argp, &op->u.open); - break; - case OP_OPEN_CONFIRM: - op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm); - break; - case OP_OPEN_DOWNGRADE: - op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade); - break; - case OP_PUTFH: - op->status = nfsd4_decode_putfh(argp, &op->u.putfh); - break; - case OP_PUTROOTFH: - op->status = nfs_ok; - break; - case OP_READ: - op->status = nfsd4_decode_read(argp, &op->u.read); - break; - case OP_READDIR: - op->status = nfsd4_decode_readdir(argp, &op->u.readdir); - break; - case OP_READLINK: - op->status = nfs_ok; - break; - case OP_REMOVE: - op->status = nfsd4_decode_remove(argp, &op->u.remove); - break; - case OP_RENAME: - op->status = nfsd4_decode_rename(argp, &op->u.rename); - break; - case OP_RESTOREFH: - op->status = nfs_ok; - break; - case OP_RENEW: - op->status = nfsd4_decode_renew(argp, &op->u.renew); - break; - case OP_SAVEFH: - op->status = nfs_ok; - break; - case OP_SECINFO: - op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo); - break; - case OP_SETATTR: - op->status = nfsd4_decode_setattr(argp, &op->u.setattr); - break; - case OP_SETCLIENTID: - op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm); - break; - case OP_VERIFY: - op->status = nfsd4_decode_verify(argp, &op->u.verify); - break; - case OP_WRITE: - op->status = nfsd4_decode_write(argp, &op->u.write); - break; - case OP_RELEASE_LOCKOWNER: - op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner); - break; - default: + if (nfsd4_opnum_in_range(argp, op)) + op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); + else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; - break; } + /* + * We'll try to cache the result in the DRC if any one + * op in the compound wants to be cached: + */ + cachethis |= nfsd4_cache_this_op(op); + + if (op->opnum == OP_READ) { + readcount++; + readbytes += nfsd4_max_reply(argp->rqstp, op); + } else + max_reply += nfsd4_max_reply(argp->rqstp, op); if (op->status) { argp->opcnt = i+1; break; } } + /* Sessions make the DRC unnecessary: */ + if (argp->minorversion) + cachethis = false; + svc_reserve(argp->rqstp, max_reply + readbytes); + argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; + + if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) + argp->rqstp->rq_splice_ok = false; DECODE_TAIL; } -/* - * END OF "GENERIC" DECODE ROUTINES. - */ - -/* - * START OF "GENERIC" ENCODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ -#define ENCODE_HEAD __be32 *p - -#define WRITE32(n) *p++ = htonl(n) -#define WRITE64(n) do { \ - *p++ = htonl((u32)((n) >> 32)); \ - *p++ = htonl((u32)(n)); \ -} while (0) -#define WRITEMEM(ptr,nbytes) do { \ - *(p + XDR_QUADLEN(nbytes) -1) = 0; \ - memcpy(p, ptr, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define WRITECINFO(c) do { \ - *p++ = htonl(c.atomic); \ - *p++ = htonl(c.before_ctime_sec); \ - *p++ = htonl(c.before_ctime_nsec); \ - *p++ = htonl(c.after_ctime_sec); \ - *p++ = htonl(c.after_ctime_nsec); \ -} while (0) - -#define RESERVE_SPACE(nbytes) do { \ - p = resp->p; \ - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ -} while (0) -#define ADJUST_ARGS() resp->p = p - -/* - * Header routine to setup seqid operation replay cache - */ -#define ENCODE_SEQID_OP_HEAD \ - __be32 *p; \ - __be32 *save; \ - \ - save = resp->p; -/* - * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This - * is where sequence id's are incremented, and the replay cache is filled. - * Note that we increment sequence id's here, at the last moment, so we're sure - * we know whether the error to be returned is a sequence id mutating error. - */ +static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode) +{ + if (IS_I_VERSION(inode)) { + p = xdr_encode_hyper(p, inode->i_version); + } else { + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); + } + return p; +} -#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ - if (seqid_mutating_err(nfserr) && stateowner) { \ - stateowner->so_seqid++; \ - stateowner->so_replay.rp_status = nfserr; \ - stateowner->so_replay.rp_buflen = \ - (((char *)(resp)->p - (char *)save)); \ - memcpy(stateowner->so_replay.rp_buf, save, \ - stateowner->so_replay.rp_buflen); \ - } } while (0); +static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) +{ + *p++ = cpu_to_be32(c->atomic); + if (c->change_supported) { + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); + } else { + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); + } + return p; +} /* Encode as an array of strings the string given with components - * seperated @sep. + * separated @sep, escaped with esc_enter and esc_exit. */ -static __be32 nfsd4_encode_components(char sep, char *components, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, + char *components, char esc_enter, + char esc_exit) { - __be32 *p = *pp; - __be32 *countp = p; + __be32 *p; + __be32 pathlen; + int pathlen_offset; int strlen, count=0; - char *str, *end; + char *str, *end, *next; dprintk("nfsd4_encode_components(%s)\n", components); - if ((*buflen -= 4) < 0) + + pathlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(0); /* We will fill this in with @count later */ + p++; /* We will fill this in with @count later */ + end = str = components; while (*end) { - for (; *end && (*end != sep); end++) - ; /* Point to end of component */ + bool found_esc = false; + + /* try to parse as esc_start, ..., esc_end, sep */ + if (*str == esc_enter) { + for (; *end && (*end != esc_exit); end++) + /* find esc_exit or end of string */; + next = end + 1; + if (*end && (!*next || *next == sep)) { + str++; + found_esc = true; + } + } + + if (!found_esc) + for (; *end && (*end != sep); end++) + /* find sep or end of string */; + strlen = end - str; if (strlen) { - if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) + p = xdr_reserve_space(xdr, strlen + 4); + if (!p) return nfserr_resource; - WRITE32(strlen); - WRITEMEM(str, strlen); + p = xdr_encode_opaque(p, str, strlen); count++; } else end++; str = end; } - *pp = p; - p = countp; - WRITE32(count); + pathlen = htonl(xdr->buf->len - pathlen_offset); + write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4); return 0; } +/* Encode as an array of strings the string given with components + * separated @sep. + */ +static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep, + char *components) +{ + return nfsd4_encode_components_esc(xdr, sep, components, 0, 0); +} + /* * encode a location element of a fs_locations structure */ -static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr, + struct nfsd4_fs_location *location) { __be32 status; - __be32 *p = *pp; - status = nfsd4_encode_components(':', location->hosts, &p, buflen); + status = nfsd4_encode_components_esc(xdr, ':', location->hosts, + '[', ']'); if (status) return status; - status = nfsd4_encode_components('/', location->path, &p, buflen); + status = nfsd4_encode_components(xdr, '/', location->path); if (status) return status; - *pp = p; return 0; } /* - * Return the path to an export point in the pseudo filesystem namespace - * Returned string is safe to use as long as the caller holds a reference - * to @exp. + * Encode a path in RFC3530 'pathname4' format */ -static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) +static __be32 nfsd4_encode_path(struct xdr_stream *xdr, + const struct path *root, + const struct path *path) { - struct svc_fh tmp_fh; - char *path, *rootpath; - - fh_init(&tmp_fh, NFS4_FHSIZE); - *stat = exp_pseudoroot(rqstp, &tmp_fh); - if (*stat) - return NULL; - rootpath = tmp_fh.fh_export->ex_pathname; + struct path cur = *path; + __be32 *p; + struct dentry **components = NULL; + unsigned int ncomponents = 0; + __be32 err = nfserr_jukebox; - path = exp->ex_pathname; + dprintk("nfsd4_encode_components("); - if (strncmp(path, rootpath, strlen(rootpath))) { - dprintk("nfsd: fs_locations failed;" - "%s is not contained in %s\n", path, rootpath); - *stat = nfserr_notsupp; - return NULL; + path_get(&cur); + /* First walk the path up to the nfsd root, and store the + * dentries/path components in an array. + */ + for (;;) { + if (cur.dentry == root->dentry && cur.mnt == root->mnt) + break; + if (cur.dentry == cur.mnt->mnt_root) { + if (follow_up(&cur)) + continue; + goto out_free; + } + if ((ncomponents & 15) == 0) { + struct dentry **new; + new = krealloc(components, + sizeof(*new) * (ncomponents + 16), + GFP_KERNEL); + if (!new) + goto out_free; + components = new; + } + components[ncomponents++] = cur.dentry; + cur.dentry = dget_parent(cur.dentry); + } + err = nfserr_resource; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_free; + *p++ = cpu_to_be32(ncomponents); + + while (ncomponents) { + struct dentry *dentry = components[ncomponents - 1]; + unsigned int len; + + spin_lock(&dentry->d_lock); + len = dentry->d_name.len; + p = xdr_reserve_space(xdr, len + 4); + if (!p) { + spin_unlock(&dentry->d_lock); + goto out_free; + } + p = xdr_encode_opaque(p, dentry->d_name.name, len); + dprintk("/%s", dentry->d_name.name); + spin_unlock(&dentry->d_lock); + dput(dentry); + ncomponents--; } - return path + strlen(rootpath); + err = 0; +out_free: + dprintk(")\n"); + while (ncomponents) + dput(components[--ncomponents]); + kfree(components); + path_put(&cur); + return err; +} + +static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr, + struct svc_rqst *rqstp, const struct path *path) +{ + struct svc_export *exp_ps; + __be32 res; + + exp_ps = rqst_find_fsidzero_export(rqstp); + if (IS_ERR(exp_ps)) + return nfserrno(PTR_ERR(exp_ps)); + res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path); + exp_put(exp_ps); + return res; } /* * encode a fs_locations structure */ -static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, - struct svc_export *exp, - __be32 **pp, int *buflen) +static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr, + struct svc_rqst *rqstp, struct svc_export *exp) { __be32 status; int i; - __be32 *p = *pp; + __be32 *p; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - char *root = nfsd4_path(rqstp, exp, &status); + status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path); if (status) return status; - status = nfsd4_encode_components('/', root, &p, buflen); - if (status) - return status; - if ((*buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; - WRITE32(fslocs->locations_count); + *p++ = cpu_to_be32(fslocs->locations_count); for (i=0; i<fslocs->locations_count; i++) { - status = nfsd4_encode_fs_location4(&fslocs->locations[i], - &p, buflen); + status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]); if (status) return status; } - *pp = p; return 0; } -static u32 nfs4_ftypes[16] = { - NF4BAD, NF4FIFO, NF4CHR, NF4BAD, - NF4DIR, NF4BAD, NF4BLK, NF4BAD, - NF4REG, NF4BAD, NF4LNK, NF4BAD, - NF4SOCK, NF4BAD, NF4LNK, NF4BAD, -}; - -static __be32 -nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) +static u32 nfs4_file_type(umode_t mode) { - int status; - - if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) - return nfserr_resource; - if (whotype != NFS4_ACL_WHO_NAMED) - status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); - else if (group) - status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); - else - status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); - if (status < 0) - return nfserrno(status); - *p = xdr_encode_opaque(*p, NULL, status); - *buflen -= (XDR_QUADLEN(status) << 2) + 4; - BUG_ON(*buflen < 0); - return 0; + switch (mode & S_IFMT) { + case S_IFIFO: return NF4FIFO; + case S_IFCHR: return NF4CHR; + case S_IFDIR: return NF4DIR; + case S_IFBLK: return NF4BLK; + case S_IFLNK: return NF4LNK; + case S_IFREG: return NF4REG; + case S_IFSOCK: return NF4SOCK; + default: return NF4BAD; + }; } static inline __be32 -nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) +nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp, + struct nfs4_ace *ace) { - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); + if (ace->whotype != NFS4_ACL_WHO_NAMED) + return nfs4_acl_write_who(xdr, ace->whotype); + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + return nfsd4_encode_group(xdr, rqstp, ace->who_gid); + else + return nfsd4_encode_user(xdr, rqstp, ace->who_uid); } -static inline __be32 -nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) -{ - return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); -} +#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ + FATTR4_WORD0_RDATTR_ERROR) +#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 -nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, - __be32 **p, int *buflen) +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) { - return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); -} + __be32 *p; -#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ - FATTR4_WORD0_RDATTR_ERROR) -#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID + p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + if (!p) + return nfserr_resource; + + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + *p++ = cpu_to_be32(0); /* lfs */ + *p++ = cpu_to_be32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, + void *context, int len) +{ return 0; } +#endif static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { @@ -1443,61 +1989,93 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) return 0; } + +static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +{ + struct path path = exp->ex_path; + int err; + + path_get(&path); + while (follow_up(&path)) { + if (path.dentry != path.mnt->mnt_root) + break; + } + err = vfs_getattr(&path, stat); + path_put(&path); + return err; +} + /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. - * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. */ -__be32 -nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, +static __be32 +nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + struct svc_export *exp, + struct dentry *dentry, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; u32 bmval1 = bmval[1]; + u32 bmval2 = bmval[2]; struct kstat stat; - struct svc_fh tempfh; + struct svc_fh *tempfh = NULL; struct kstatfs statfs; - int buflen = *countp << 2; - __be32 *attrlenp; + __be32 *p; + int starting_len = xdr->buf->len; + int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; __be32 status; int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; + struct nfsd4_compoundres *resp = rqstp->rq_resp; + u32 minorversion = resp->cstate.minorversion; + struct path path = { + .mnt = exp->ex_path.mnt, + .dentry = dentry, + }; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); - BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); - BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); + BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); + BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); + BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); if (exp->ex_fslocs.migrated) { + BUG_ON(bmval[2]); status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); if (status) goto out; } - err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); + err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { - err = vfs_statfs(dentry, &statfs); + err = vfs_statfs(&path, &statfs); if (err) goto out_nfserr; } if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { - fh_init(&tempfh, NFS4_FHSIZE); - status = fh_compose(&tempfh, exp, dentry, NULL); + tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + status = nfserr_jukebox; + if (!tempfh) + goto out; + fh_init(tempfh, NFS4_FHSIZE); + status = fh_compose(tempfh, exp, dentry, NULL); if (status) goto out; - fhp = &tempfh; + fhp = tempfh; } if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_SUPPORTED_ATTRS)) { @@ -1513,363 +2091,492 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, goto out_nfserr; } } - if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - if (exp->ex_fslocs.locations == NULL) { - bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS; + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; } } - if ((buflen -= 16) < 0) - goto out_resource; +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ - WRITE32(2); - WRITE32(bmval0); - WRITE32(bmval1); - attrlenp = p++; /* to be backfilled later */ + if (bmval2) { + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + *p++ = cpu_to_be32(bmval2); + } else if (bmval1) { + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bmval0); + *p++ = cpu_to_be32(bmval1); + } else { + p = xdr_reserve_space(xdr, 8); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(bmval0); + } + + attrlen_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; - if ((buflen -= 12) < 0) - goto out_resource; + u32 word0 = nfsd_suppattrs0(minorversion); + u32 word1 = nfsd_suppattrs1(minorversion); + u32 word2 = nfsd_suppattrs2(minorversion); + if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; - if (!exp->ex_fslocs.locations) - word0 &= ~FATTR4_WORD0_FS_LOCATIONS; - WRITE32(2); - WRITE32(word0); - WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; + if (!word2) { + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + } else { + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(word0); + *p++ = cpu_to_be32(word1); + *p++ = cpu_to_be32(word2); + } } if (bmval0 & FATTR4_WORD0_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12]; - if (dummy == NF4BAD) - goto out_serverfault; - WRITE32(dummy); + dummy = nfs4_file_type(stat.mode); + if (dummy == NF4BAD) { + status = nfserr_serverfault; + goto out; + } + *p++ = cpu_to_be32(dummy); } if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_PERSISTENT); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); + *p++ = cpu_to_be32(NFS4_FH_PERSISTENT| + NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - /* - * Note: This _must_ be consistent with the scheme for writing - * change_info, so any changes made here must be reflected there - * as well. (See xdr4.h:set_change_info() and the WRITECINFO() - * macro above.) - */ - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + p = encode_change(p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.size); + p = xdr_encode_hyper(p, stat.size); } if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_FSID) { - if ((buflen -= 16) < 0) + p = xdr_reserve_space(xdr, 16); + if (!p) goto out_resource; if (exp->ex_fslocs.migrated) { - WRITE64(NFS4_REFERRAL_FSID_MAJOR); - WRITE64(NFS4_REFERRAL_FSID_MINOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR); + p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR); } else switch(fsid_source(fhp)) { case FSIDSOURCE_FSID: - WRITE64((u64)exp->ex_fsid); - WRITE64((u64)0); + p = xdr_encode_hyper(p, (u64)exp->ex_fsid); + p = xdr_encode_hyper(p, (u64)0); break; case FSIDSOURCE_DEV: - WRITE32(0); - WRITE32(MAJOR(stat.dev)); - WRITE32(0); - WRITE32(MINOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MAJOR(stat.dev)); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(MINOR(stat.dev)); break; case FSIDSOURCE_UUID: - WRITEMEM(exp->ex_uuid, 16); + p = xdr_encode_opaque_fixed(p, exp->ex_uuid, + EX_UUID_LEN); break; } } if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(NFSD_LEASE_TIME); + *p++ = cpu_to_be32(nn->nfsd4_lease); } if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(rdattr_err); + *p++ = cpu_to_be32(rdattr_err); } if (bmval0 & FATTR4_WORD0_ACL) { struct nfs4_ace *ace; if (acl == NULL) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(0); + *p++ = cpu_to_be32(0); goto out_acl; } - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(acl->naces); + *p++ = cpu_to_be32(acl->naces); for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) { - if ((buflen -= 4*3) < 0) - goto out_resource; - WRITE32(ace->type); - WRITE32(ace->flag); - WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); - status = nfsd4_encode_aclname(rqstp, ace->whotype, - ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, - &p, &buflen); - if (status == nfserr_resource) + p = xdr_reserve_space(xdr, 4*3); + if (!p) goto out_resource; + *p++ = cpu_to_be32(ace->type); + *p++ = cpu_to_be32(ace->flag); + *p++ = cpu_to_be32(ace->access_mask & + NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(xdr, rqstp, ace); if (status) goto out; } } out_acl: if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(aclsupport ? + *p++ = cpu_to_be32(aclsupport ? ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); } if (bmval0 & FATTR4_WORD0_CANSETTIME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_FILEHANDLE) { - buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; - if (buflen < 0) + p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); + if (!p) goto out_resource; - WRITE32(fhp->fh_handle.fh_size); - WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(stat.ino); + p = xdr_encode_hyper(p, stat.ino); } if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_ffree); + p = xdr_encode_hyper(p, (u64) statfs.f_ffree); } if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) statfs.f_files); + p = xdr_encode_hyper(p, (u64) statfs.f_files); } if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) { - status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_fs_locations(xdr, rqstp, exp); if (status) goto out; } if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64(~(u64)0); + p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes); } if (bmval0 & FATTR4_WORD0_MAXLINK) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(255); + *p++ = cpu_to_be32(255); } if (bmval0 & FATTR4_WORD0_MAXNAME) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(statfs.f_namelen); + *p++ = cpu_to_be32(statfs.f_namelen); } if (bmval0 & FATTR4_WORD0_MAXREAD) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval0 & FATTR4_WORD0_MAXWRITE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE64((u64) svc_max_payload(rqstp)); + p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp)); } if (bmval1 & FATTR4_WORD1_MODE) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.mode & S_IALLUGO); + *p++ = cpu_to_be32(stat.mode & S_IALLUGO); } if (bmval1 & FATTR4_WORD1_NO_TRUNC) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(1); + *p++ = cpu_to_be32(1); } if (bmval1 & FATTR4_WORD1_NUMLINKS) { - if ((buflen -= 4) < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; - WRITE32(stat.nlink); + *p++ = cpu_to_be32(stat.nlink); } if (bmval1 & FATTR4_WORD1_OWNER) { - status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_user(xdr, rqstp, stat.uid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { - status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); - if (status == nfserr_resource) - goto out_resource; + status = nfsd4_encode_group(xdr, rqstp, stat.gid); if (status) goto out; } if (bmval1 & FATTR4_WORD1_RAWDEV) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; - WRITE32((u32) MAJOR(stat.rdev)); - WRITE32((u32) MINOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MAJOR(stat.rdev)); + *p++ = cpu_to_be32((u32) MINOR(stat.rdev)); } if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_FREE) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_SPACE_USED) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; dummy64 = (u64)stat.blocks << 9; - WRITE64(dummy64); + p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); - WRITE32(stat.atime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(1); - WRITE32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(1); + *p++ = cpu_to_be32(0); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - if ((buflen -= 12) < 0) + p = xdr_reserve_space(xdr, 12); + if (!p) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); - WRITE32(stat.mtime.tv_nsec); + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - if ((buflen -= 8) < 0) + p = xdr_reserve_space(xdr, 8); + if (!p) goto out_resource; /* * Get parent's attributes if not ignoring crossmount * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && - exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) { - err = vfs_getattr(exp->ex_path.mnt->mnt_parent, - exp->ex_path.mnt->mnt_mountpoint, &stat); - if (err) - goto out_nfserr; - } - WRITE64(stat.ino); + dentry == exp->ex_path.mnt->mnt_root) + get_parent_attributes(exp, &stat); + p = xdr_encode_hyper(p, stat.ino); + } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(xdr, rqstp, context, + contextlen); + if (status) + goto out; } - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + p = xdr_reserve_space(xdr, 16); + if (!p) + goto out_resource; + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1); + *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2); + } + + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (context) + security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); - if (fhp == &tempfh) - fh_put(&tempfh); + if (tempfh) { + fh_put(tempfh); + kfree(tempfh); + } + if (status) + xdr_truncate_encode(xdr, starting_len); return status; out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; -out_serverfault: - status = nfserr_serverfault; - goto out; +} + +static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr, + struct xdr_buf *buf, __be32 *p, int bytes) +{ + xdr->scratch.iov_len = 0; + memset(buf, 0, sizeof(struct xdr_buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = 0; + buf->len = 0; + xdr->buf = buf; + xdr->iov = buf->head; + xdr->p = p; + xdr->end = (void *)p + bytes; + buf->buflen = bytes; +} + +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *bmval, + struct svc_rqst *rqstp, int ignore_crossmnt) +{ + struct xdr_buf dummy; + struct xdr_stream xdr; + __be32 ret; + + svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); + ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp, + ignore_crossmnt); + *p = xdr.p; + return ret; +} + +static inline int attributes_need_mount(u32 *bmval) +{ + if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME)) + return 1; + if (bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) + return 1; + return 0; } static __be32 -nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) +nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd, + const char *name, int namlen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -1879,6 +2586,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } exp_get(exp); /* @@ -1888,13 +2604,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, * we will not follow the cross mount and will fill the attribtutes * directly from the mountpoint dentry. */ - if (d_mountpoint(dentry) && - (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 && - (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0) - ignore_crossmnt = 1; - else if (d_mountpoint(dentry)) { + if (nfsd_mountpoint(dentry, exp)) { int err; + if (!(exp->ex_flags & NFSEXP_V4ROOT) + && !attributes_need_mount(cd->rd_bmval)) { + ignore_crossmnt = 1; + goto out_encode; + } /* * Why the heck aren't we just using nfsd_lookup?? * Different "."/".." handling? Something else? @@ -1910,7 +2627,8 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, goto out_put; } - nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, +out_encode: + nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval, cd->rd_rqstp, ignore_crossmnt); out_put: dput(dentry); @@ -1919,19 +2637,19 @@ out_put: } static __be32 * -nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr) +nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr) { - __be32 *attrlenp; + __be32 *p; - if (buflen < 6) + p = xdr_reserve_space(xdr, 20); + if (!p) return NULL; *p++ = htonl(2); *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ *p++ = htonl(0); /* bmval1 */ - attrlenp = p++; + *p++ = htonl(4); /* attribute length */ *p++ = nfserr; /* no htonl */ - *attrlenp = htonl((char *)p - (char *)attrlenp - 4); return p; } @@ -1941,9 +2659,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, { struct readdir_cd *ccd = ccdv; struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); - int buflen; - __be32 *p = cd->buffer; + struct xdr_stream *xdr = cd->xdr; + int start_offset = xdr->buf->len; + int cookie_offset; + int entry_bytes; __be32 nfserr = nfserr_toosmall; + __be64 wire_offset; + __be32 *p; /* In nfsv4, "." and ".." never make it onto the wire.. */ if (name && isdotent(name, namlen)) { @@ -1951,28 +2673,33 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, return 0; } - if (cd->offset) - xdr_encode_hyper(cd->offset, (u64) offset); + if (cd->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset, + &wire_offset, 8); + } - buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); - if (buflen < 0) + p = xdr_reserve_space(xdr, 4); + if (!p) goto fail; - *p++ = xdr_one; /* mark entry present */ - cd->offset = p; /* remember pointer */ + cookie_offset = xdr->buf->len; + p = xdr_reserve_space(xdr, 3*4 + namlen); + if (!p) + goto fail; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; goto fail; - case nfserr_dropit: - goto fail; + case nfserr_noent: + xdr_truncate_encode(xdr, start_offset); + goto skip_entry; default: /* * If the client requested the RDATTR_ERROR attribute, @@ -1983,391 +2710,580 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, */ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) goto fail; - p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + p = nfsd4_encode_rdattr_error(xdr, nfserr); if (p == NULL) { nfserr = nfserr_toosmall; goto fail; } } - cd->buflen -= (p - cd->buffer); - cd->buffer = p; + nfserr = nfserr_toosmall; + entry_bytes = xdr->buf->len - start_offset; + if (entry_bytes > cd->rd_maxcount) + goto fail; + cd->rd_maxcount -= entry_bytes; + if (!cd->rd_dircount) + goto fail; + cd->rd_dircount--; + cd->cookie_offset = cookie_offset; +skip_entry: cd->common.err = nfs_ok; return 0; fail: + xdr_truncate_encode(xdr, start_offset); cd->common.err = nfserr; return -EINVAL; } -static void +static __be32 +nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(stateid_t)); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sid->si_generation); + p = xdr_encode_opaque_fixed(p, &sid->si_opaque, + sizeof(stateid_opaque_t)); + return 0; +} + +static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITE32(access->ac_supported); - WRITE32(access->ac_resp_access); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); } + return nfserr; } -static void -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(close->cl_stateid.si_generation); - WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); + /* Sorry, we do not yet support RDMA over 4.1: */ + *p++ = cpu_to_be32(0); } - ENCODE_SEQID_OP_TAIL(close->cl_stateowner); + return nfserr; +} + +static __be32 +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) +{ + struct xdr_stream *xdr = &resp->xdr; + + if (!nfserr) + nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); + + return nfserr; } -static void +static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITEMEM(commit->co_verf.data, 8); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + NFS4_VERIFIER_SIZE); } + return nfserr; } -static void +static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(32); - WRITECINFO(create->cr_cinfo); - WRITE32(2); - WRITE32(create->cr_bmval[0]); - WRITE32(create->cr_bmval[1]); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &create->cr_cinfo); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(create->cr_bmval[0]); + *p++ = cpu_to_be32(create->cr_bmval[1]); } + return nfserr; } static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - int buflen; + struct xdr_stream *xdr = &resp->xdr; if (nfserr) return nfserr; - buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); - nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } -static void -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp) +static __be32 +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { + struct xdr_stream *xdr = &resp->xdr; + struct svc_fh *fhp = *fhpp; unsigned int len; - ENCODE_HEAD; + __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; - RESERVE_SPACE(len + 4); - WRITE32(len); - WRITEMEM(&fhp->fh_handle.fh_base, len); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); } + return nfserr; } /* * Including all fields other than the name, a LOCK4denied structure requires * 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. */ -static void -nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) -{ - ENCODE_HEAD; - - RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); - WRITE64(ld->ld_start); - WRITE64(ld->ld_length); - WRITE32(ld->ld_type); - if (ld->ld_sop) { - WRITEMEM(&ld->ld_clientid, 8); - WRITE32(ld->ld_sop->so_owner.len); - WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len); - kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner); +static __be32 +nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld) +{ + struct xdr_netobj *conf = &ld->ld_owner; + __be32 *p; + +again: + p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len)); + if (!p) { + /* + * Don't fail to return the result just because we can't + * return the conflicting open: + */ + if (conf->len) { + kfree(conf->data); + conf->len = 0; + conf->data = NULL; + goto again; + } + return nfserr_resource; + } + p = xdr_encode_hyper(p, ld->ld_start); + p = xdr_encode_hyper(p, ld->ld_length); + *p++ = cpu_to_be32(ld->ld_type); + if (conf->len) { + p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); + p = xdr_encode_opaque(p, conf->data, conf->len); + kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ - WRITE64((u64)0); /* clientid */ - WRITE32(0); /* length of owner name */ + p = xdr_encode_hyper(p, (u64)0); /* clientid */ + *p++ = cpu_to_be32(0); /* length of owner name */ } - ADJUST_ARGS(); + return nfserr_denied; } -static void +static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) { - RESERVE_SPACE(4 + sizeof(stateid_t)); - WRITE32(lock->lk_resp_stateid.si_generation); - WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } else if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lock->lk_denied); + if (!nfserr) + nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); + else if (nfserr == nfserr_denied) + nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); + return nfserr; } -static void +static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { + struct xdr_stream *xdr = &resp->xdr; + if (nfserr == nfserr_denied) - nfsd4_encode_lock_denied(resp, &lockt->lt_denied); + nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); + return nfserr; } -static void +static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(locku->lu_stateid.si_generation); - WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } - - ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); + if (!nfserr) + nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); + + return nfserr; } -static void +static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - WRITECINFO(link->li_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); } + return nfserr; } -static void +static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - ENCODE_SEQID_OP_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (nfserr) goto out; - RESERVE_SPACE(36 + sizeof(stateid_t)); - WRITE32(open->op_stateid.si_generation); - WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); - WRITECINFO(open->op_cinfo); - WRITE32(open->op_rflags); - WRITE32(2); - WRITE32(open->op_bmval[0]); - WRITE32(open->op_bmval[1]); - WRITE32(open->op_delegate_type); - ADJUST_ARGS(); + nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); + if (nfserr) + goto out; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &open->op_cinfo); + *p++ = cpu_to_be32(open->op_rflags); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(open->op_bmval[0]); + *p++ = cpu_to_be32(open->op_bmval[1]); + *p++ = cpu_to_be32(open->op_delegate_type); switch (open->op_delegate_type) { case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - RESERVE_SPACE(20 + sizeof(stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); - WRITE32(open->op_recall); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_recall); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ break; case NFS4_OPEN_DELEGATE_WRITE: - RESERVE_SPACE(32 + sizeof(stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); - WRITE32(0); + nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid); + if (nfserr) + return nfserr; + p = xdr_reserve_space(xdr, 32); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* * TODO: space_limit's in delegations */ - WRITE32(NFS4_LIMIT_SIZE); - WRITE32(~(u32)0); - WRITE32(~(u32)0); + *p++ = cpu_to_be32(NFS4_LIMIT_SIZE); + *p++ = cpu_to_be32(~(u32)0); + *p++ = cpu_to_be32(~(u32)0); /* * TODO: ACE's in delegations */ - WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); - WRITE32(0); - WRITE32(0); - WRITE32(0); /* XXX: is NULL principal ok? */ - ADJUST_ARGS(); + *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */ + break; + case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ + switch (open->op_why_no_deleg) { + case WND4_CONTENTION: + case WND4_RESOURCE: + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); + /* deleg signaling not supported yet: */ + *p++ = cpu_to_be32(0); + break; + default: + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(open->op_why_no_deleg); + } break; default: BUG(); } /* XXX save filehandle here */ out: - ENCODE_SEQID_OP_TAIL(open->op_stateowner); + return nfserr; } -static void +static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - ENCODE_SEQID_OP_HEAD; - - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(oc->oc_resp_stateid.si_generation); - WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } + struct xdr_stream *xdr = &resp->xdr; - ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); + if (!nfserr) + nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); + + return nfserr; } -static void +static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - ENCODE_SEQID_OP_HEAD; - - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(od->od_stateid.si_generation); - WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } + struct xdr_stream *xdr = &resp->xdr; + + if (!nfserr) + nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - ENCODE_SEQID_OP_TAIL(od->od_stateowner); + return nfserr; } -static __be32 -nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_read *read) +static __be32 nfsd4_encode_splice_read( + struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) { + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = xdr->buf; u32 eof; - int v, pn; - unsigned long maxcount; - long len; - ENCODE_HEAD; + int space_left; + __be32 nfserr; + __be32 *p = xdr->p - 2; - if (nfserr) - return nfserr; - if (resp->xbuf->page_len) + /* + * Don't inline pages unless we know there's room for eof, + * count, and possible padding: + */ + if (xdr->end - xdr->p < 3) return nfserr_resource; - RESERVE_SPACE(8); /* eof flag and byte count */ + nfserr = nfsd_splice_read(read->rd_rqstp, file, + read->rd_offset, &maxcount); + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + buf->page_len = 0; + return nfserr; + } - maxcount = svc_max_payload(resp->rqstp); - if (maxcount > read->rd_length) - maxcount = read->rd_length; + eof = (read->rd_offset + maxcount >= + read->rd_fhp->fh_dentry->d_inode->i_size); + + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); + + buf->page_len = maxcount; + buf->len += maxcount; + xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Use rest of head for padding and remaining ops: */ + buf->tail[0].iov_base = xdr->p; + buf->tail[0].iov_len = 0; + xdr->iov = buf->tail; + if (maxcount&3) { + int pad = 4 - (maxcount&3); + + *(xdr->p++) = 0; + + buf->tail[0].iov_base += maxcount&3; + buf->tail[0].iov_len = pad; + buf->len += pad; + } + + space_left = min_t(int, (void *)xdr->end - (void *)xdr->p, + buf->buflen - buf->len); + buf->buflen = buf->len + space_left; + xdr->end = (__be32 *)((void *)xdr->end + space_left); + + return 0; +} + +static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + struct file *file, unsigned long maxcount) +{ + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int v; + int starting_len = xdr->buf->len - 8; + long len; + int thislen; + __be32 nfserr; + __be32 tmp; + __be32 *p; + u32 zzz = 0; + int pad; len = maxcount; v = 0; - while (len > 0) { - pn = resp->rqstp->rq_resused++; - resp->rqstp->rq_vec[v].iov_base = - page_address(resp->rqstp->rq_respages[pn]); - resp->rqstp->rq_vec[v].iov_len = - len < PAGE_SIZE ? len : PAGE_SIZE; + + thislen = (void *)xdr->end - (void *)xdr->p; + if (len < thislen) + thislen = len; + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; + v++; + len -= thislen; + + while (len) { + thislen = min_t(long, len, PAGE_SIZE); + p = xdr_reserve_space(xdr, (thislen+3)&~3); + WARN_ON_ONCE(!p); + resp->rqstp->rq_vec[v].iov_base = p; + resp->rqstp->rq_vec[v].iov_len = thislen; v++; - len -= PAGE_SIZE; + len -= thislen; } read->rd_vlen = v; - nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, - read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, - &maxcount); - - if (nfserr == nfserr_symlink) - nfserr = nfserr_inval; + nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec, + read->rd_vlen, &maxcount); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); - WRITE32(eof); - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); - } + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &zzz, pad); return 0; + } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) { - int maxcount; - char *page; - ENCODE_HEAD; + unsigned long maxcount; + struct xdr_stream *xdr = &resp->xdr; + struct file *file = read->rd_filp; + int starting_len = xdr->buf->len; + struct raparms *ra; + __be32 *p; + __be32 err; if (nfserr) return nfserr; - if (resp->xbuf->page_len) + + p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ + if (!p) { + WARN_ON_ONCE(resp->rqstp->rq_splice_ok); return nfserr_resource; + } + if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) { + WARN_ON_ONCE(1); + return nfserr_resource; + } + xdr_commit_encode(xdr); + + maxcount = svc_max_payload(resp->rqstp); + if (maxcount > xdr->buf->buflen - xdr->buf->len) + maxcount = xdr->buf->buflen - xdr->buf->len; + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + if (!read->rd_filp) { + err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, + &file, &ra); + if (err) + goto err_truncate; + } + + if (file->f_op->splice_read && resp->rqstp->rq_splice_ok) + err = nfsd4_encode_splice_read(resp, read, file, maxcount); + else + err = nfsd4_encode_readv(resp, read, file, maxcount); - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + if (!read->rd_filp) + nfsd_put_tmp_read_open(file, ra); +err_truncate: + if (err) + xdr_truncate_encode(xdr, starting_len); + return err; +} + +static __be32 +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) +{ + int maxcount; + __be32 wire_count; + int zero = 0; + struct xdr_stream *xdr = &resp->xdr; + int length_offset = xdr->buf->len; + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; maxcount = PAGE_SIZE; - RESERVE_SPACE(4); + p = xdr_reserve_space(xdr, maxcount); + if (!p) + return nfserr_resource; /* * XXX: By default, the ->readlink() VFS op will truncate symlinks * if they would overflow the buffer. Is this kosher in NFSv4? If * not, one easy fix is: if ->readlink() precisely fills the buffer, * assume that truncation occurred, and return NFS4ERR_RESOURCE. */ - nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, + (char *)p, &maxcount); if (nfserr == nfserr_isdir) - return nfserr_inval; - if (nfserr) + nfserr = nfserr_inval; + if (nfserr) { + xdr_truncate_encode(xdr, length_offset); return nfserr; - - WRITE32(maxcount); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = (char*)p - - (char*)resp->xbuf->head[0].iov_base; - resp->xbuf->page_len = maxcount; - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = p; - resp->xbuf->tail[0].iov_len = 0; - if (maxcount&3) { - RESERVE_SPACE(4); - WRITE32(0); - resp->xbuf->tail[0].iov_base += maxcount&3; - resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); - ADJUST_ARGS(); } + + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); return 0; } @@ -2375,45 +3291,52 @@ static __be32 nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { int maxcount; + int bytes_left; loff_t offset; - __be32 *page, *savep, *tailbase; - ENCODE_HEAD; + __be64 wire_offset; + struct xdr_stream *xdr = &resp->xdr; + int starting_len = xdr->buf->len; + __be32 *p; if (nfserr) return nfserr; - if (resp->xbuf->page_len) - return nfserr_resource; - RESERVE_SPACE(8); /* verifier */ - savep = p; + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); - resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; - tailbase = p; - - maxcount = PAGE_SIZE; - if (maxcount > readdir->rd_maxcount) - maxcount = readdir->rd_maxcount; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; /* - * Convert from bytes to words, account for the two words already - * written, make sure to leave two words at the end for the next - * pointer and eof field. + * Number of bytes left for directory entries allowing for the + * final 8 bytes of the readdir and a following failed op: + */ + bytes_left = xdr->buf->buflen - xdr->buf->len + - COMPOUND_ERR_SLACK_SPACE - 8; + if (bytes_left < 0) { + nfserr = nfserr_resource; + goto err_no_verf; + } + maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX); + /* + * Note the rfc defines rd_maxcount as the size of the + * READDIR4resok structure, which includes the verifier above + * and the 8 bytes encoded at the end of this function: */ - maxcount = (maxcount >> 2) - 4; - if (maxcount < 0) { - nfserr = nfserr_toosmall; + if (maxcount < 16) { + nfserr = nfserr_toosmall; goto err_no_verf; } + maxcount = min_t(int, maxcount-16, bytes_left); - page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); + readdir->xdr = xdr; + readdir->rd_maxcount = maxcount; readdir->common.err = 0; - readdir->buflen = maxcount; - readdir->buffer = page; - readdir->offset = NULL; + readdir->cookie_offset = 0; offset = readdir->rd_cookie; nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, @@ -2421,73 +3344,82 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 &readdir->common, nfsd4_encode_dirent); if (nfserr == nfs_ok && readdir->common.err == nfserr_toosmall && - readdir->buffer == page) - nfserr = nfserr_toosmall; - if (nfserr == nfserr_symlink) - nfserr = nfserr_notdir; + xdr->buf->len == starting_len + 8) { + /* nothing encoded; which limit did we hit?: */ + if (maxcount - 16 < bytes_left) + /* It was the fault of rd_maxcount: */ + nfserr = nfserr_toosmall; + else + /* We ran out of buffer space: */ + nfserr = nfserr_resource; + } if (nfserr) goto err_no_verf; - if (readdir->offset) - xdr_encode_hyper(readdir->offset, offset); + if (readdir->cookie_offset) { + wire_offset = cpu_to_be64(offset); + write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, + &wire_offset, 8); + } - p = readdir->buffer; + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + goto err_no_verf; + } *p++ = 0; /* no more entries */ *p++ = htonl(readdir->common.err == nfserr_eof); - resp->xbuf->page_len = ((char*)p) - (char*)page_address( - resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); - - /* Use rest of head for padding and remaining ops: */ - resp->xbuf->tail[0].iov_base = tailbase; - resp->xbuf->tail[0].iov_len = 0; - resp->p = resp->xbuf->tail[0].iov_base; - resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4; return 0; err_no_verf: - p = savep; - ADJUST_ARGS(); + xdr_truncate_encode(xdr, starting_len); return nfserr; } -static void +static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(20); - WRITECINFO(remove->rm_cinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); } + return nfserr; } -static void +static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(40); - WRITECINFO(rename->rn_sinfo); - WRITECINFO(rename->rn_tinfo); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); } + return nfserr; } -static void -nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - struct nfsd4_secinfo *secinfo) +static __be32 +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, + __be32 nfserr, struct svc_export *exp) { - int i = 0; - struct svc_export *exp = secinfo->si_exp; - u32 nflavs; + u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - ENCODE_HEAD; + __be32 *p, *flavorsp; + static bool report = true; if (nfserr) goto out; + nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -2508,206 +3440,496 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, } } - RESERVE_SPACE(4); - WRITE32(nflavs); - ADJUST_ARGS(); + supported = 0; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + flavorsp = p++; /* to be backfilled later */ + for (i = 0; i < nflavs; i++) { - u32 flav = flavs[i].pseudoflavor; - struct gss_api_mech *gm = gss_mech_get_by_pseudoflavor(flav); - - if (gm) { - RESERVE_SPACE(4); - WRITE32(RPC_AUTH_GSS); - ADJUST_ARGS(); - RESERVE_SPACE(4 + gm->gm_oid.len); - WRITE32(gm->gm_oid.len); - WRITEMEM(gm->gm_oid.data, gm->gm_oid.len); - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(0); /* qop */ - ADJUST_ARGS(); - RESERVE_SPACE(4); - WRITE32(gss_pseudoflavor_to_service(gm, flav)); - ADJUST_ARGS(); - gss_mech_put(gm); + rpc_authflavor_t pf = flavs[i].pseudoflavor; + struct rpcsec_gss_info info; + + if (rpcauth_get_gssinfo(pf, &info) == 0) { + supported++; + p = xdr_reserve_space(xdr, 4 + 4 + + XDR_LEN(info.oid.len) + 4 + 4); + if (!p) + goto out; + *p++ = cpu_to_be32(RPC_AUTH_GSS); + p = xdr_encode_opaque(p, info.oid.data, info.oid.len); + *p++ = cpu_to_be32(info.qop); + *p++ = cpu_to_be32(info.service); + } else if (pf < RPC_AUTH_MAXFLAVOR) { + supported++; + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out; + *p++ = cpu_to_be32(pf); } else { - RESERVE_SPACE(4); - WRITE32(flav); - ADJUST_ARGS(); + if (report) + pr_warn("NFS: SECINFO: security flavor %u " + "is not supported\n", pf); } } + + if (nflavs != supported) + report = false; + *flavorsp = htonl(supported); + nfserr = 0; out: if (exp) exp_put(exp); + return nfserr; +} + +static __be32 +nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo *secinfo) +{ + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); +} + +static __be32 +nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_secinfo_no_name *secinfo) +{ + struct xdr_stream *xdr = &resp->xdr; + + return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); } /* * The SETATTR encode routine is special -- it always encodes a bitmap, * regardless of the error status. */ -static void +static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; - RESERVE_SPACE(12); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; if (nfserr) { - WRITE32(2); - WRITE32(0); - WRITE32(0); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } else { - WRITE32(2); - WRITE32(setattr->sa_bmval[0]); - WRITE32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(setattr->sa_bmval[0]); + *p++ = cpu_to_be32(setattr->sa_bmval[1]); + *p++ = cpu_to_be32(setattr->sa_bmval[2]); } - ADJUST_ARGS(); + return nfserr; } -static void +static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + sizeof(nfs4_verifier)); - WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8); + p = xdr_encode_opaque_fixed(p, &scd->se_confirm, + NFS4_VERIFIER_SIZE); } else if (nfserr == nfserr_clid_inuse) { - RESERVE_SPACE(8); - WRITE32(0); - WRITE32(0); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(0); } + return nfserr; } -static void +static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; if (!nfserr) { - RESERVE_SPACE(16); - WRITE32(write->wr_bytes_written); - WRITE32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, 8); - ADJUST_ARGS(); + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); } + return nfserr; } -void -nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + +static __be32 +nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_exchange_id *exid) { - __be32 *statp; - ENCODE_HEAD; + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + char *major_id; + char *server_scope; + int major_id_sz; + int server_scope_sz; + uint64_t minor_id = 0; - RESERVE_SPACE(8); - WRITE32(op->opnum); - statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); + if (nfserr) + return nfserr; - switch (op->opnum) { - case OP_ACCESS: - nfsd4_encode_access(resp, op->status, &op->u.access); - break; - case OP_CLOSE: - nfsd4_encode_close(resp, op->status, &op->u.close); - break; - case OP_COMMIT: - nfsd4_encode_commit(resp, op->status, &op->u.commit); - break; - case OP_CREATE: - nfsd4_encode_create(resp, op->status, &op->u.create); - break; - case OP_DELEGRETURN: - break; - case OP_GETATTR: - op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr); - break; - case OP_GETFH: - nfsd4_encode_getfh(resp, op->status, op->u.getfh); - break; - case OP_LINK: - nfsd4_encode_link(resp, op->status, &op->u.link); - break; - case OP_LOCK: - nfsd4_encode_lock(resp, op->status, &op->u.lock); - break; - case OP_LOCKT: - nfsd4_encode_lockt(resp, op->status, &op->u.lockt); - break; - case OP_LOCKU: - nfsd4_encode_locku(resp, op->status, &op->u.locku); - break; - case OP_LOOKUP: - break; - case OP_LOOKUPP: - break; - case OP_NVERIFY: - break; - case OP_OPEN: - nfsd4_encode_open(resp, op->status, &op->u.open); - break; - case OP_OPEN_CONFIRM: - nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm); - break; - case OP_OPEN_DOWNGRADE: - nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade); - break; - case OP_PUTFH: - break; - case OP_PUTROOTFH: - break; - case OP_READ: - op->status = nfsd4_encode_read(resp, op->status, &op->u.read); - break; - case OP_READDIR: - op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir); - break; - case OP_READLINK: - op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink); - break; - case OP_REMOVE: - nfsd4_encode_remove(resp, op->status, &op->u.remove); - break; - case OP_RENAME: - nfsd4_encode_rename(resp, op->status, &op->u.rename); - break; - case OP_RENEW: - break; - case OP_RESTOREFH: - break; - case OP_SAVEFH: - break; - case OP_SECINFO: - nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo); - break; - case OP_SETATTR: - nfsd4_encode_setattr(resp, op->status, &op->u.setattr); - break; - case OP_SETCLIENTID: - nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - break; - case OP_VERIFY: - break; - case OP_WRITE: - nfsd4_encode_write(resp, op->status, &op->u.write); + major_id = utsname()->nodename; + major_id_sz = strlen(major_id); + server_scope = utsname()->nodename; + server_scope_sz = strlen(server_scope); + + p = xdr_reserve_space(xdr, + 8 /* eir_clientid */ + + 4 /* eir_sequenceid */ + + 4 /* eir_flags */ + + 4 /* spr_how */); + if (!p) + return nfserr_resource; + + p = xdr_encode_opaque_fixed(p, &exid->clientid, 8); + *p++ = cpu_to_be32(exid->seqid); + *p++ = cpu_to_be32(exid->flags); + + *p++ = cpu_to_be32(exid->spa_how); + + switch (exid->spa_how) { + case SP4_NONE: break; - case OP_RELEASE_LOCKOWNER: + case SP4_MACH_CRED: + /* spo_must_enforce, spo_must_allow */ + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + + /* spo_must_enforce bitmap: */ + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]); + *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + *p++ = cpu_to_be32(0); + break; default: - break; + WARN_ON_ONCE(1); } - /* - * Note: We write the status directly, instead of using WRITE32(), - * since it is already in network byte order. - */ - *statp = op->status; + p = xdr_reserve_space(xdr, + 8 /* so_minor_id */ + + 4 /* so_major_id.len */ + + (XDR_QUADLEN(major_id_sz) * 4) + + 4 /* eir_server_scope.len */ + + (XDR_QUADLEN(server_scope_sz) * 4) + + 4 /* eir_server_impl_id.count (0) */); + if (!p) + return nfserr_resource; + + /* The server_owner struct */ + p = xdr_encode_hyper(p, minor_id); /* Minor id */ + /* major id */ + p = xdr_encode_opaque(p, major_id, major_id_sz); + + /* Server scope */ + p = xdr_encode_opaque(p, server_scope, server_scope_sz); + + /* Implementation id */ + *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ + return 0; +} + +static __be32 +nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_create_session *sess) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 24); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, sess->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(sess->seqid); + *p++ = cpu_to_be32(sess->flags); + + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->fore_channel.maxops); + *p++ = cpu_to_be32(sess->fore_channel.maxreqs); + *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs); + + if (sess->fore_channel.nr_rdma_attrs) { + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs); + } + + p = xdr_reserve_space(xdr, 28); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(0); /* headerpadsz */ + *p++ = cpu_to_be32(sess->back_channel.maxreq_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_sz); + *p++ = cpu_to_be32(sess->back_channel.maxresp_cached); + *p++ = cpu_to_be32(sess->back_channel.maxops); + *p++ = cpu_to_be32(sess->back_channel.maxreqs); + *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs); + + if (sess->back_channel.nr_rdma_attrs) { + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(sess->back_channel.rdma_attrs); + } + return 0; +} + +static __be32 +nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_sequence *seq) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, seq->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(seq->seqid); + *p++ = cpu_to_be32(seq->slotid); + /* Note slotid's are numbered from zero: */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */ + *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */ + *p++ = cpu_to_be32(seq->status_flags); + + resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */ + return 0; +} + +static __be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_test_stateid *test_stateid) +{ + struct xdr_stream *xdr = &resp->xdr; + struct nfsd4_test_stateid_id *stateid, *next; + __be32 *p; + + if (nfserr) + return nfserr; + + p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); + if (!p) + return nfserr_resource; + *p++ = htonl(test_stateid->ts_num_ids); + + list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { + *p++ = stateid->ts_id_status; + } + + return nfserr; +} + +static __be32 +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) +{ + return nfserr; +} + +typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); + +/* + * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 + * since we don't need to filter out obsolete ops as this is + * done in the decoding phase. + */ +static nfsd4_enc nfsd4_enc_ops[] = { + [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, + [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, + [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, + [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, + [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, + [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, + [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, + [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, + [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, + [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, + [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, + [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, + [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ] = (nfsd4_enc)nfsd4_encode_read, + [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, + [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, + [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, + [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, + [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, + [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, + [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, + [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, + + /* NFSv4.1 operations */ + [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, + [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, +}; + +/* + * Calculate whether we still have space to encode repsize bytes. + * There are two considerations: + * - For NFS versions >=4.1, the size of the reply must stay within + * session limits + * - For all NFS versions, we must stay within limited preallocated + * buffer space. + * + * This is called before the operation is processed, so can only provide + * an upper estimate. For some nonidempotent operations (such as + * getattr), it's not necessarily a problem if that estimate is wrong, + * as we can fail it after processing without significant side effects. + */ +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) +{ + struct xdr_buf *buf = &resp->rqstp->rq_res; + struct nfsd4_slot *slot = resp->cstate.slot; + + if (buf->len + respsize <= buf->buflen) + return nfs_ok; + if (!nfsd4_has_session(&resp->cstate)) + return nfserr_resource; + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) { + WARN_ON_ONCE(1); + return nfserr_rep_too_big_to_cache; + } + return nfserr_rep_too_big; +} + +void +nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +{ + struct xdr_stream *xdr = &resp->xdr; + struct nfs4_stateowner *so = resp->cstate.replay_owner; + struct svc_rqst *rqstp = resp->rqstp; + int post_err_offset; + nfsd4_enc encoder; + __be32 *p; + + p = xdr_reserve_space(xdr, 8); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); + post_err_offset = xdr->buf->len; + + if (op->opnum == OP_ILLEGAL) + goto status; + BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + !nfsd4_enc_ops[op->opnum]); + encoder = nfsd4_enc_ops[op->opnum]; + op->status = encoder(resp, op->status, &op->u); + xdr_commit_encode(xdr); + + /* nfsd4_check_resp_size guarantees enough room for error status */ + if (!op->status) { + int space_needed = 0; + if (!nfsd4_last_compound_op(rqstp)) + space_needed = COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, space_needed); + } + if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { + struct nfsd4_slot *slot = resp->cstate.slot; + + if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) + op->status = nfserr_rep_too_big_to_cache; + else + op->status = nfserr_rep_too_big; + } + if (op->status == nfserr_resource || + op->status == nfserr_rep_too_big || + op->status == nfserr_rep_too_big_to_cache) { + /* + * The operation may have already been encoded or + * partially encoded. No op returns anything additional + * in the case of one of these three errors, so we can + * just truncate back to after the status. But it's a + * bug if we had to do this on a non-idempotent op: + */ + warn_on_nonidempotent_op(op); + xdr_truncate_encode(xdr, post_err_offset); + } + if (so) { + int len = xdr->buf->len - post_err_offset; + + so->so_replay.rp_status = op->status; + so->so_replay.rp_buflen = len; + read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + so->so_replay.rp_buf, len); + } +status: + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* @@ -2719,35 +3941,35 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * called with nfs4_lock_state() held */ void -nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) { - ENCODE_HEAD; + __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); - RESERVE_SPACE(8); - WRITE32(op->opnum); + p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); + if (!p) { + WARN_ON_ONCE(1); + return; + } + *p++ = cpu_to_be32(op->opnum); *p++ = rp->rp_status; /* already xdr'ed */ - ADJUST_ARGS(); - RESERVE_SPACE(rp->rp_buflen); - WRITEMEM(rp->rp_buf, rp->rp_buflen); - ADJUST_ARGS(); + p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } -/* - * END OF "GENERIC" ENCODE ROUTINES. - */ - int nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) { return xdr_ressize_check(rqstp, p); } -void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) { + struct svc_rqst *rqstp = rq; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + if (args->ops != args->iops) { kfree(args->ops); args->ops = args->iops; @@ -2760,13 +3982,18 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) tb->release(tb->buf); kfree(tb); } + return 1; } int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) { - __be32 status; - + if (rqstp->rq_arg.head[0].iov_len % 4) { + /* client is nuts */ + dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", + __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); + return 0; + } args->p = p; args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; @@ -2776,11 +4003,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_comp args->ops = args->iops; args->rqstp = rqstp; - status = nfsd4_decode_compound(args); - if (status) { - nfsd4_release_compoundargs(args); - } - return !status; + return !nfsd4_decode_compound(args); } int @@ -2789,19 +4012,33 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ - struct kvec *iov; + struct nfsd4_compound_state *cs = &resp->cstate; + struct xdr_buf *buf = resp->xdr.buf; + + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); + + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (rqstp->rq_res.page_len) - iov = &rqstp->rq_res.tail[0]; - else - iov = &rqstp->rq_res.head[0]; - iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; - BUG_ON(iov->iov_len > PAGE_SIZE); + if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; + } + /* Renew the clientid on success and on replay */ + spin_lock(&nn->client_lock); + nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); + } return 1; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5bfc2ac60d5..6040da8830f 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/nfscache.c - * * Request reply cache. This is currently a global cache, but this may * change in the future and be a per-client cache. * @@ -10,63 +8,172 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/kernel.h> -#include <linux/time.h> #include <linux/slab.h> -#include <linux/string.h> -#include <linux/spinlock.h> -#include <linux/list.h> - -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> - -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <linux/log2.h> +#include <linux/hash.h> +#include <net/checksum.h> + +#include "nfsd.h" +#include "cache.h" + +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + +/* + * We use this value to determine the number of hash buckets from the max + * cache size, the idea being that when the cache is at its maximum number + * of entries, then this should be the average number of entries per bucket. */ -#define CACHESIZE 1024 -#define HASHSIZE 64 -#define REQHASH(xid) (((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1)) +#define TARGET_BUCKET_SIZE 64 -static struct hlist_head * hash_list; +static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; + +/* max number of entries allowed in the cache */ +static unsigned int max_drc_entries; + +/* number of significant bits in the hash value */ +static unsigned int maskbits; + +/* + * Stats and other tracking of on the duplicate reply cache. All of these and + * the "rc" fields in nfsdstats are protected by the cache_lock + */ + +/* total number of entries */ +static unsigned int num_drc_entries; + +/* cache misses due only to checksum comparison failures */ +static unsigned int payload_misses; + +/* amount of memory (in bytes) currently consumed by the DRC */ +static unsigned int drc_mem_usage; + +/* longest hash chain seen */ +static unsigned int longest_chain; + +/* size of cache when we saw the longest hash chain */ +static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); + +static struct shrinker nfsd_reply_cache_shrinker = { + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, + .seeks = 1, +}; -/* +/* * locking for the reply cache: * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +/* + * Compute the number of hash buckets we need. Divide the max cachesize by + * the "target" max bucket size, and round up to next power of two. + */ +static unsigned int +nfsd_hashsize(unsigned int limit) +{ + return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} - hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); - if (!hash_list) +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { + drc_mem_usage -= rp->c_replvec.iov_len; + kfree(rp->c_replvec.iov_base); + } + if (!hlist_unhashed(&rp->c_hash)) + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + drc_mem_usage -= sizeof(*rp); + kmem_cache_free(drc_slab, rp); +} + +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + unsigned int hashsize; + + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + hashsize = nfsd_hashsize(max_drc_entries); + maskbits = ilog2(hashsize); + + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; + + cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); + if (!cache_hash) goto out_nomem; - cache_disabled = 0; return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -78,27 +185,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; + kfree (cache_hash); + cache_hash = NULL; - kfree (hash_list); - hash_list = NULL; + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -108,88 +221,236 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + long freed = 0; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) + break; + nfsd_reply_cache_free_locked(rp); + freed++; + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long num; + + spin_lock(&cache_lock); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned long freed; + + spin_lock(&cache_lock); + freed = prune_cache_entries(); + spin_unlock(&cache_lock); + return freed; +} +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +static bool +nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) +{ + /* Check RPC header info first */ + if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + + /* compare checksum of NFS data */ + if (csum != rp->c_csum) { + ++payload_misses; + return false; + } + + return true; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp, *ret = NULL; + struct hlist_head *rh; + unsigned int entries = 0; + + rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + hlist_for_each_entry(rp, rh, c_hash) { + ++entries; + if (nfsd_cache_match(rqstp, csum, rp)) { + ret = rp; + break; + } + } + + /* tally hash chain length stats */ + if (entries > longest_chain) { + longest_chain = entries; + longest_chain_cachesize = num_drc_entries; + } else if (entries == longest_chain) { + /* prefer to keep the smallest cachesize possible here */ + longest_chain_cachesize = min(longest_chain_cachesize, + num_drc_entries); + } + + return ret; } /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int -nfsd_cache_lookup(struct svc_rqst *rqstp, int type) +nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_node *hn; - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; - int rtn; + int type = rqstp->rq_cachetype; + int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return RC_DOIT; + return rtn; } - spin_lock(&cache_lock); - rtn = RC_DOIT; + csum = nfsd_cache_csum(rqstp); - rh = &hash_list[REQHASH(xid)]; - hlist_for_each_entry(rp, hn, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; - } + /* + * Since the common case is a cache miss followed by an insert, + * preallocate an entry. + */ + rp = nfsd_reply_cache_alloc(); + spin_lock(&cache_lock); + if (likely(rp)) { + ++num_drc_entries; + drc_mem_usage += sizeof(*rp); } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } - } - } + /* go ahead and prune the cache */ + prune_cache_entries(); - /* This should not happen */ - if (rp == NULL) { - static int complaints; + found = nfsd_cache_search(rqstp, csum); + if (found) { + if (likely(rp)) + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; + } - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); goto out; } + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { + drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); rp->c_replvec.iov_base = NULL; } @@ -199,9 +460,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -230,7 +491,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -255,19 +516,20 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; + size_t bufsize = 0; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; - + /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -279,23 +541,25 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) break; case RC_REPLBUFF: cachv = &rp->c_replvec; - cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + bufsize = len << 2; + cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } - cachv->iov_len = len << 2; - memcpy(cachv->iov_base, statp, len << 2); + cachv->iov_len = bufsize; + memcpy(cachv->iov_base, statp, bufsize); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); + drc_mem_usage += bufsize; lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } @@ -319,3 +583,30 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) vec->iov_len += data->iov_len; return 1; } + +/* + * Note that fields may be added, removed or reordered in the future. Programs + * scraping this file for info should test the labels to ensure they're + * getting the correct field. + */ +static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +{ + spin_lock(&cache_lock); + seq_printf(m, "max entries: %u\n", max_drc_entries); + seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "hash buckets: %u\n", 1 << maskbits); + seq_printf(m, "mem usage: %u\n", drc_mem_usage); + seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); + seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); + seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); + seq_printf(m, "payload misses: %u\n", payload_misses); + seq_printf(m, "longest chain len: %u\n", longest_chain); + seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + spin_unlock(&cache_lock); + return 0; +} + +int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_reply_cache_stats_show, NULL); +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 8516137cdbb..51844048937 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1,68 +1,52 @@ /* - * linux/fs/nfsd/nfsctl.c - * * Syscall interface to knfsd. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/module.h> - -#include <linux/linkage.h> -#include <linux/time.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> -#include <linux/syscalls.h> -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/smp_lock.h> +#include <linux/namei.h> #include <linux/ctype.h> -#include <linux/nfs.h> -#include <linux/nfsd_idmap.h> -#include <linux/lockd/bind.h> -#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> +#include <linux/lockd/lockd.h> +#include <linux/sunrpc/addr.h> +#include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/gss_krb5_enctypes.h> +#include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/module.h> -#include <asm/uaccess.h> +#include "idmap.h" +#include "nfsd.h" +#include "cache.h" +#include "state.h" +#include "netns.h" /* - * We have a single directory with 9 nodes in it. + * We have a single directory with several nodes in it. */ enum { NFSD_Root = 1, - NFSD_Svc, - NFSD_Add, - NFSD_Del, - NFSD_Export, - NFSD_Unexport, - NFSD_Getfd, - NFSD_Getfs, NFSD_List, + NFSD_Export_features, NFSD_Fh, + NFSD_FO_UnlockIP, + NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, + NFSD_Pool_Stats, + NFSD_Reply_Cache_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, + NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops */ #ifdef CONFIG_NFSD_V4 NFSD_Leasetime, + NFSD_Gracetime, NFSD_RecoveryDir, #endif }; @@ -70,14 +54,9 @@ enum { /* * write() for these nodes. */ -static ssize_t write_svc(struct file *file, char *buf, size_t size); -static ssize_t write_add(struct file *file, char *buf, size_t size); -static ssize_t write_del(struct file *file, char *buf, size_t size); -static ssize_t write_export(struct file *file, char *buf, size_t size); -static ssize_t write_unexport(struct file *file, char *buf, size_t size); -static ssize_t write_getfd(struct file *file, char *buf, size_t size); -static ssize_t write_getfs(struct file *file, char *buf, size_t size); static ssize_t write_filehandle(struct file *file, char *buf, size_t size); +static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); +static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); static ssize_t write_threads(struct file *file, char *buf, size_t size); static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); @@ -85,18 +64,14 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); +static ssize_t write_gracetime(struct file *file, char *buf, size_t size); static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { - [NFSD_Svc] = write_svc, - [NFSD_Add] = write_add, - [NFSD_Del] = write_del, - [NFSD_Export] = write_export, - [NFSD_Unexport] = write_unexport, - [NFSD_Getfd] = write_getfd, - [NFSD_Getfs] = write_getfs, [NFSD_Fh] = write_filehandle, + [NFSD_FO_UnlockIP] = write_unlock_ip, + [NFSD_FO_UnlockFS] = write_unlock_fs, [NFSD_Threads] = write_threads, [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, @@ -104,13 +79,14 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_MaxBlkSize] = write_maxblksize, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, + [NFSD_Gracetime] = write_gracetime, [NFSD_RecoveryDir] = write_recoverydir, #endif }; static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) { - ino_t ino = file->f_path.dentry->d_inode->i_ino; + ino_t ino = file_inode(file)->i_ino; char *data; ssize_t rv; @@ -147,162 +123,228 @@ static const struct file_operations transaction_ops = { .write = nfsctl_transaction_write, .read = nfsctl_transaction_read, .release = simple_transaction_release, + .llseek = default_llseek, }; -extern struct seq_operations nfs_exports_op; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) +{ + int err; + struct seq_file *seq; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + err = seq_open(file, &nfs_exports_op); + if (err) + return err; + + seq = file->private_data; + seq->private = nn->svc_export_cache; + return 0; +} + +static int exports_proc_open(struct inode *inode, struct file *file) { - return seq_open(file, &nfs_exports_op); + return exports_net_open(current->nsproxy->net_ns, file); } -static const struct file_operations exports_operations = { - .open = exports_open, +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; -/*----------------------------------------------------------------------------*/ -/* - * payload - write methods - * If the method has a response, the response should be put in buf, - * and the length returned. Otherwise return 0 or and -error. - */ +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; -static ssize_t write_svc(struct file *file, char *buf, size_t size) +static int export_features_show(struct seq_file *m, void *v) { - struct nfsctl_svc *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_svc*) buf; - return nfsd_svc(data->svc_port, data->svc_nthreads); + seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS); + return 0; } -static ssize_t write_add(struct file *file, char *buf, size_t size) +static int export_features_open(struct inode *inode, struct file *file) { - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_addclient(data); + return single_open(file, export_features_show, NULL); } -static ssize_t write_del(struct file *file, char *buf, size_t size) +static const struct file_operations export_features_operations = { + .open = export_features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) +static int supported_enctypes_show(struct seq_file *m, void *v) { - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_delclient(data); + seq_printf(m, KRB5_SUPPORTED_ENCTYPES); + return 0; } -static ssize_t write_export(struct file *file, char *buf, size_t size) +static int supported_enctypes_open(struct inode *inode, struct file *file) { - struct nfsctl_export *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_export(data); + return single_open(file, supported_enctypes_show, NULL); } -static ssize_t write_unexport(struct file *file, char *buf, size_t size) +static const struct file_operations supported_enctypes_ops = { + .open = supported_enctypes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ + +static const struct file_operations pool_stats_operations = { + .open = nfsd_pool_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = nfsd_pool_stats_release, + .owner = THIS_MODULE, +}; + +static struct file_operations reply_cache_stats_operations = { + .open = nfsd_reply_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/*----------------------------------------------------------------------------*/ +/* + * payload - write methods + */ + + +/** + * write_unlock_ip - Release all locks used by a client + * + * Experimental. + * + * Input: + * buf: '\n'-terminated C string containing a + * presentation format IP address + * size: length of C string in @buf + * Output: + * On success: returns zero if all specified locks were released; + * returns one if one or more locks were not released + * On error: return code is negative errno value + */ +static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) { - struct nfsctl_export *data; + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + size_t salen = sizeof(address); + char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; - if (size < sizeof(*data)) + /* sanity check */ + if (size == 0) return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_unexport(data); -} -static ssize_t write_getfs(struct file *file, char *buf, size_t size) -{ - struct nfsctl_fsparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh *res; + if (buf[size-1] != '\n') + return -EINVAL; - if (size < sizeof(*data)) + fo_path = buf; + if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - data = (struct nfsctl_fsparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - sin = (struct sockaddr_in *)&data->gd_addr; - if (data->gd_maxlen > NFS3_FHSIZE) - data->gd_maxlen = NFS3_FHSIZE; - - res = (struct knfsd_fh*)buf; - - exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); - auth_domain_put(clp); - } - exp_readunlock(); - if (err == 0) - err = res->fh_size + offsetof(struct knfsd_fh, fh_base); - out: - return err; + + if (rpc_pton(net, fo_path, size, sap, salen) == 0) + return -EINVAL; + + return nlmsvc_unlock_all_by_ip(sap); } -static ssize_t write_getfd(struct file *file, char *buf, size_t size) +/** + * write_unlock_fs - Release all locks on a local file system + * + * Experimental. + * + * Input: + * buf: '\n'-terminated C string containing the + * absolute pathname of a local file system + * size: length of C string in @buf + * Output: + * On success: returns zero if all specified locks were released; + * returns one if one or more locks were not released + * On error: return code is negative errno value + */ +static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) { - struct nfsctl_fdparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh fh; - char *res; + struct path path; + char *fo_path; + int error; - if (size < sizeof(*data)) + /* sanity check */ + if (size == 0) return -EINVAL; - data = (struct nfsctl_fdparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - err = -EINVAL; - if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS) - goto out; - - res = buf; - sin = (struct sockaddr_in *)&data->gd_addr; - exp_readlock(); - if (!(clp = auth_unix_lookup(sin->sin_addr))) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); - auth_domain_put(clp); - } - exp_readunlock(); - if (err == 0) { - memset(res,0, NFS_FHSIZE); - memcpy(res, &fh.fh_base, fh.fh_size); - err = NFS_FHSIZE; - } - out: - return err; + if (buf[size-1] != '\n') + return -EINVAL; + + fo_path = buf; + if (qword_get(&buf, fo_path, size) < 0) + return -EINVAL; + + error = kern_path(fo_path, 0, &path); + if (error) + return error; + + /* + * XXX: Needs better sanity checking. Otherwise we could end up + * releasing locks on the wrong file system. + * + * For example: + * 1. Does the path refer to a directory? + * 2. Is that directory a mount point, or + * 3. Is that directory the root of an exported file system? + */ + error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); + + path_put(&path); + return error; } +/** + * write_filehandle - Get a variable-length NFS file handle by path + * + * On input, the buffer contains a '\n'-terminated C string comprised of + * three alphanumeric words separated by whitespace. The string may + * contain escape sequences. + * + * Input: + * buf: + * domain: client domain name + * path: export pathname + * maxsize: numeric maximum size of + * @buf + * size: length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing a ASCII hex text version + * of the NFS file handle; + * return code is the size in bytes of the string + * On error: return code is negative errno value + */ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { - /* request is: - * domain path maxsize - * response is - * filehandle - * - * qword quoting is used, so filehandle will be \x.... - */ char *dname, *path; int uninitialized_var(maxsize); char *mesg = buf; int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -313,11 +355,13 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) dname = mesg; len = qword_get(&mesg, dname, size); - if (len <= 0) return -EINVAL; + if (len <= 0) + return -EINVAL; path = dname+len+1; len = qword_get(&mesg, path, size); - if (len <= 0) return -EINVAL; + if (len <= 0) + return -EINVAL; len = get_int(&mesg, &maxsize); if (len) @@ -336,45 +380,90 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; - mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; + mesg = buf; + len = SIMPLE_TRANSACTION_LIMIT; qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); mesg[-1] = '\n'; return mesg - buf; } -extern int nfsd_nrthreads(void); - +/** + * write_threads - Start NFSD, or report the current number of running threads + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string numeric value representing the number of + * running NFSD threads; + * return code is the size in bytes of the string + * On error: return code is zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the + * number of NFSD threads to start + * size: non-zero length of C string in @buf + * Output: + * On success: NFS service is started; + * passed-in buffer filled with '\n'-terminated C + * string numeric value representing the number of + * running NFSD threads; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ static ssize_t write_threads(struct file *file, char *buf, size_t size) { - /* if size > 0, look for a number of threads and call nfsd_svc - * then write out number of threads as reply - */ char *mesg = buf; int rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; + if (size > 0) { int newthreads; rv = get_int(&mesg, &newthreads); if (rv) return rv; - if (newthreads <0) + if (newthreads < 0) return -EINVAL; - rv = nfsd_svc(2049, newthreads); - if (rv) + rv = nfsd_svc(newthreads, net); + if (rv < 0) return rv; - } - sprintf(buf, "%d\n", nfsd_nrthreads()); - return strlen(buf); -} + } else + rv = nfsd_nrthreads(net); -extern int nfsd_nrpools(void); -extern int nfsd_get_nrthreads(int n, int *); -extern int nfsd_set_nrthreads(int n, int *); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); +} +/** + * write_pool_threads - Set or report the current number of threads per pool + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing whitespace- + * separated unsigned integer values + * representing the number of NFSD + * threads to start in each pool + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing integer values representing the + * number of NFSD threads in each pool; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) { /* if size > 0, look for an array of number of threads per node @@ -384,22 +473,27 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int i; int rv; int len; - int npools = nfsd_nrpools(); + int npools; int *nthreads; + struct net *net = file->f_dentry->d_sb->s_fs_info; + mutex_lock(&nfsd_mutex); + npools = nfsd_nrpools(net); if (npools == 0) { /* * NFS is shut down. The admin can start it by * writing to the threads file but NOT the pool_threads * file, sorry. Report zero threads. */ + mutex_unlock(&nfsd_mutex); strcpy(buf, "0\n"); return strlen(buf); } nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); + rv = -ENOMEM; if (nthreads == NULL) - return -ENOMEM; + goto out_free; if (size > 0) { for (i = 0; i < npools; i++) { @@ -412,12 +506,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) if (nthreads[i] < 0) goto out_free; } - rv = nfsd_set_nrthreads(i, nthreads); + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; } - rv = nfsd_get_nrthreads(npools, nthreads); + rv = nfsd_get_nrthreads(npools, nthreads, net); if (rv) goto out_free; @@ -429,30 +523,28 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) size -= len; mesg += len; } - - return (mesg-buf); - + rv = mesg - buf; out_free: kfree(nthreads); + mutex_unlock(&nfsd_mutex); return rv; } -static ssize_t write_versions(struct file *file, char *buf, size_t size) +static ssize_t __write_versions(struct file *file, char *buf, size_t size) { - /* - * Format: - * [-/+]vers [-/+]vers ... - */ char *mesg = buf; - char *vers, sign; - int len, num; + char *vers, *minorp, sign; + int len, num, remaining; + unsigned minor; ssize_t tlen = 0; char *sep; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { - if (nfsd_serv) + if (nn->nfsd_serv) /* Cannot change versions without updating - * nfsd_serv->sv_xdrsize, and reallocing + * nn->nfsd_serv->sv_xdrsize, and reallocing * rq_argp and rq_resp */ return -EBUSY; @@ -466,9 +558,20 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) do { sign = *vers; if (sign == '+' || sign == '-') - num = simple_strtol((vers+1), NULL, 0); + num = simple_strtol((vers+1), &minorp, 0); else - num = simple_strtol(vers, NULL, 0); + num = simple_strtol(vers, &minorp, 0); + if (*minorp == '.') { + if (num != 4) + return -EINVAL; + minor = simple_strtoul(minorp+1, NULL, 0); + if (minor == 0) + return -EINVAL; + if (nfsd_minorversion(minor, sign == '-' ? + NFSD_CLEAR : NFSD_SET) < 0) + return -EINVAL; + goto next; + } switch(num) { case 2: case 3: @@ -478,140 +581,288 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) default: return -EINVAL; } + next: vers += len + 1; - tlen += len; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as * having no versions is BAD */ nfsd_reset_versions(); } + /* Now write current state into reply buffer */ len = 0; sep = ""; + remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) if (nfsd_vers(num, NFSD_AVAIL)) { - len += sprintf(buf+len, "%s%c%d", sep, + len = snprintf(buf, remaining, "%s%c%d", sep, nfsd_vers(num, NFSD_TEST)?'+':'-', num); sep = " "; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; } - len += sprintf(buf+len, "\n"); - return len; + if (nfsd_vers(4, NFSD_AVAIL)) + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; + minor++) { + len = snprintf(buf, remaining, " %c4.%u", + (nfsd_vers(4, NFSD_TEST) && + nfsd_minorversion(minor, NFSD_TEST)) ? + '+' : '-', + minor); + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; + } + + len = snprintf(buf, remaining, "\n"); + if (len > remaining) + return -EINVAL; + return tlen + len; } -static ssize_t write_ports(struct file *file, char *buf, size_t size) +/** + * write_versions - Set or report the available NFS protocol versions + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing positive or negative integer + * values representing the current status of each + * protocol version; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + * + * OR + * + * Input: + * buf: C string containing whitespace- + * separated positive or negative + * integer values representing NFS + * protocol versions to enable ("+n") + * or disable ("-n") + * size: non-zero length of C string in @buf + * Output: + * On success: status of zero or more protocol versions has + * been updated; passed-in buffer filled with + * '\n'-terminated C string containing positive + * or negative integer values representing the + * current status of each protocol version; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_versions(struct file *file, char *buf, size_t size) { - if (size == 0) { - int len = 0; - lock_kernel(); - if (nfsd_serv) - len = svc_xprt_names(nfsd_serv, buf, 0); - unlock_kernel(); - return len; - } - /* Either a single 'fd' number is written, in which - * case it must be for a socket of a supported family/protocol, - * and we use it as an nfsd socket, or - * A '-' followed by the 'name' of a socket in which case - * we close the socket. - */ - if (isdigit(buf[0])) { - char *mesg = buf; - int fd; - int err; - err = get_int(&mesg, &fd); - if (err) - return -EINVAL; - if (fd < 0) - return -EINVAL; - err = nfsd_create_serv(); - if (!err) { - int proto = 0; - err = svc_addsock(nfsd_serv, fd, buf, &proto); - if (err >= 0) { - err = lockd_up(proto); - if (err < 0) - svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); - } - /* Decrease the count, but don't shutdown the - * the service - */ - lock_kernel(); - nfsd_serv->sv_nrthreads--; - unlock_kernel(); - } - return err < 0 ? err : 0; - } - if (buf[0] == '-' && isdigit(buf[1])) { - char *toclose = kstrdup(buf+1, GFP_KERNEL); - int len = 0; - if (!toclose) - return -ENOMEM; - lock_kernel(); - if (nfsd_serv) - len = svc_sock_names(buf, nfsd_serv, toclose); - unlock_kernel(); - if (len >= 0) - lockd_down(); - kfree(toclose); - return len; + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_versions(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +/* + * Zero-length write. Return a list of NFSD's current listener + * transports. + */ +static ssize_t __write_ports_names(char *buf, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) + return 0; + return svc_xprt_names(nn->nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); +} + +/* + * A single 'fd' number was written, in which case it must be for + * a socket of a supported family/protocol, and we use it as an + * nfsd listener. + */ +static ssize_t __write_ports_addfd(char *buf, struct net *net) +{ + char *mesg = buf; + int fd, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + err = get_int(&mesg, &fd); + if (err != 0 || fd < 0) + return -EINVAL; + + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; } - /* - * Add a transport listener by writing it's transport name - */ - if (isalpha(buf[0])) { - int err; - char transport[16]; - int port; - if (sscanf(buf, "%15s %4d", transport, &port) == 2) { - err = nfsd_create_serv(); - if (!err) { - err = svc_create_xprt(nfsd_serv, - transport, port, - SVC_SOCK_ANONYMOUS); - if (err == -ENOENT) - /* Give a reasonable perror msg for - * bad transport string */ - err = -EPROTONOSUPPORT; - } - return err < 0 ? err : 0; - } + + err = nfsd_create_serv(net); + if (err != 0) + return err; + + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + if (err < 0) { + nfsd_destroy(net); + return err; } - /* - * Remove a transport by writing it's transport name and port number - */ - if (buf[0] == '-' && isalpha(buf[1])) { - struct svc_xprt *xprt; - int err = -EINVAL; - char transport[16]; - int port; - if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { - if (port == 0) - return -EINVAL; - lock_kernel(); - if (nfsd_serv) { - xprt = svc_find_xprt(nfsd_serv, transport, - AF_UNSPEC, port); - if (xprt) { - svc_close_xprt(xprt); - svc_xprt_put(xprt); - err = 0; - } else - err = -ENOTCONN; - } - unlock_kernel(); - return err < 0 ? err : 0; - } + + /* Decrease the count, but don't shut down the service */ + nn->nfsd_serv->sv_nrthreads--; + return err; +} + +/* + * A transport listener is added by writing it's transport name and + * a port number. + */ +static ssize_t __write_ports_addxprt(char *buf, struct net *net) +{ + char transport[16]; + struct svc_xprt *xprt; + int port, err; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (sscanf(buf, "%15s %5u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHRT_MAX) + return -EINVAL; + + err = nfsd_create_serv(net); + if (err != 0) + return err; + + err = svc_create_xprt(nn->nfsd_serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS); + if (err < 0) + goto out_err; + + err = svc_create_xprt(nn->nfsd_serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS); + if (err < 0 && err != -EAFNOSUPPORT) + goto out_close; + + /* Decrease the count, but don't shut down the service */ + nn->nfsd_serv->sv_nrthreads--; + return 0; +out_close: + xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); + if (xprt != NULL) { + svc_close_xprt(xprt); + svc_xprt_put(xprt); } +out_err: + nfsd_destroy(net); + return err; +} + +static ssize_t __write_ports(struct file *file, char *buf, size_t size, + struct net *net) +{ + if (size == 0) + return __write_ports_names(buf, net); + + if (isdigit(buf[0])) + return __write_ports_addfd(buf, net); + + if (isalpha(buf[0])) + return __write_ports_addxprt(buf, net); + return -EINVAL; } +/** + * write_ports - Pass a socket file descriptor or transport name to listen on + * + * Input: + * buf: ignored + * size: zero + * Output: + * On success: passed-in buffer filled with a '\n'-terminated C + * string containing a whitespace-separated list of + * named NFSD listeners; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing a bound + * but unconnected socket that is to be + * used as an NFSD listener; listen(3) + * must be called for a SOCK_STREAM + * socket, otherwise it is ignored + * size: non-zero length of C string in @buf + * Output: + * On success: NFS service is started; + * passed-in buffer filled with a '\n'-terminated C + * string containing a unique alphanumeric name of + * the listener; + * return code is the size in bytes of the string + * On error: return code is a negative errno value + * + * OR + * + * Input: + * buf: C string containing a transport + * name and an unsigned integer value + * representing the port to listen on, + * separated by whitespace + * size: non-zero length of C string in @buf + * Output: + * On success: returns zero; NFS service is started + * On error: return code is a negative errno value + */ +static ssize_t write_ports(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; + + mutex_lock(&nfsd_mutex); + rv = __write_ports(file, buf, size, net); + mutex_unlock(&nfsd_mutex); + return rv; +} + + int nfsd_max_blksize; +/** + * write_maxblksize - Set or report the current NFS blksize + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * NFS blksize + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing numeric value of the current NFS blksize + * setting; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (size > 0) { int bsize; int rv = get_int(&mesg, &bsize); @@ -625,59 +876,169 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) if (bsize > NFSSVC_MAXBLKSIZE) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); - lock_kernel(); - if (nfsd_serv && nfsd_serv->sv_nrthreads) { - unlock_kernel(); + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv) { + mutex_unlock(&nfsd_mutex); return -EBUSY; } nfsd_max_blksize = bsize; - unlock_kernel(); + mutex_unlock(&nfsd_mutex); } - return sprintf(buf, "%d\n", nfsd_max_blksize); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", + nfsd_max_blksize); } #ifdef CONFIG_NFSD_V4 -extern time_t nfs4_leasetime(void); - -static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) { - /* if size > 10 seconds, call - * nfs4_reset_lease() then write out the new lease (seconds) as reply - */ char *mesg = buf; - int rv; + int rv, i; if (size > 0) { - int lease; - rv = get_int(&mesg, &lease); + if (nn->nfsd_serv) + return -EBUSY; + rv = get_int(&mesg, &i); if (rv) return rv; - if (lease < 10 || lease > 3600) + /* + * Some sanity checking. We don't have a reason for + * these particular numbers, but problems with the + * extremes are: + * - Too short: the briefest network outage may + * cause clients to lose all their locks. Also, + * the frequent polling may be wasteful. + * - Too long: do you really want reboot recovery + * to take more than an hour? Or to make other + * clients wait an hour before being able to + * revoke a dead client's locks? + */ + if (i < 10 || i > 3600) return -EINVAL; - nfs4_reset_lease(lease); + *time = i; } - sprintf(buf, "%ld\n", nfs4_lease_time()); - return strlen(buf); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); } -static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, + time_t *time, struct nfsd_net *nn) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __nfsd4_write_time(file, buf, size, time, nn); + mutex_unlock(&nfsd_mutex); + return rv; +} + +/** + * write_leasetime - Set or report the current NFSv4 lease time + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * NFSv4 lease expiry time + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C + * string containing unsigned integer value of the + * current lease expiry time; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); +} + +/** + * write_gracetime - Set or report current NFSv4 grace period time + * + * As above, but sets the time of the NFSv4 grace period. + * + * Note this should never be set to less than the *previous* + * lease-period time, but we don't try to enforce this. (In the common + * case (a new boot), we don't know what the previous lease time was + * anyway.) + */ +static ssize_t write_gracetime(struct file *file, char *buf, size_t size) +{ + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); +} + +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, + struct nfsd_net *nn) { char *mesg = buf; char *recdir; int len, status; - if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') - return -EINVAL; - buf[size-1] = 0; + if (size > 0) { + if (nn->nfsd_serv) + return -EBUSY; + if (size > PATH_MAX || buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; - recdir = mesg; - len = qword_get(&mesg, recdir, size); - if (len <= 0) - return -EINVAL; + recdir = mesg; + len = qword_get(&mesg, recdir, size); + if (len <= 0) + return -EINVAL; + + status = nfs4_reset_recoverydir(recdir); + if (status) + return status; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", + nfs4_recoverydir()); +} + +/** + * write_recoverydir - Set or report the pathname of the recovery directory + * + * Input: + * buf: ignored + * size: zero + * + * OR + * + * Input: + * buf: C string containing the pathname + * of the directory on a local file + * system containing permanent NFSv4 + * recovery data + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing the current recovery pathname setting; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - status = nfs4_reset_recoverydir(recdir); - return strlen(buf); + mutex_lock(&nfsd_mutex); + rv = __write_recoverydir(file, buf, size, nn); + mutex_unlock(&nfsd_mutex); + return rv; } + #endif /*----------------------------------------------------------------------------*/ @@ -688,41 +1049,62 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, - [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, - [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, - [NFSD_Export] = {".export", &transaction_ops, S_IWUSR}, - [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, - [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, - [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, + [NFSD_Export_features] = {"export_features", + &export_features_operations, S_IRUGO}, + [NFSD_FO_UnlockIP] = {"unlock_ip", + &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_FO_UnlockFS] = {"unlock_filesystem", + &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, +#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, +#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } -static int nfsd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *nfsd_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", - .get_sb = nfsd_get_sb, - .kill_sb = kill_litter_super, + .mount = nfsd_mount, + .kill_sb = nfsd_umount, }; +MODULE_ALIAS_FS("nfsd"); #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) @@ -732,10 +1114,12 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = create_proc_entry("fs/nfs/exports", 0, NULL); - if (!entry) + entry = proc_create("exports", 0, entry, + &exports_proc_operations); + if (!entry) { + remove_proc_entry("fs/nfs", NULL); return -ENOMEM; - entry->proc_fops = &exports_operations; + } return 0; } #else /* CONFIG_PROC_FS */ @@ -745,28 +1129,67 @@ static int create_proc_exports_entry(void) } #endif +int nfsd_net_id; + +static __net_init int nfsd_init_net(struct net *net) +{ + int retval; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + retval = nfsd_export_init(net); + if (retval) + goto out_export_error; + retval = nfsd_idmap_init(net); + if (retval) + goto out_idmap_error; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + return 0; + +out_idmap_error: + nfsd_export_shutdown(net); +out_export_error: + return retval; +} + +static __net_exit void nfsd_exit_net(struct net *net) +{ + nfsd_idmap_shutdown(net); + nfsd_export_shutdown(net); +} + +static struct pernet_operations nfsd_net_ops = { + .init = nfsd_init_net, + .exit = nfsd_exit_net, + .id = &nfsd_net_id, + .size = sizeof(struct nfsd_net), +}; + static int __init init_nfsd(void) { int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfs4_state_init(); /* nfs4 locking state */ + retval = register_cld_notifier(); if (retval) return retval; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) + goto out_unregister_notifier; + retval = nfsd4_init_slabs(); + if (retval) + goto out_unregister_pernet; + retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ + if (retval) + goto out_free_slabs; nfsd_stat_init(); /* Statistics */ retval = nfsd_reply_cache_init(); if (retval) goto out_free_stat; - retval = nfsd_export_init(); - if (retval) - goto out_free_cache; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = nfsd_idmap_init(); - if (retval) - goto out_free_lockd; retval = create_proc_exports_entry(); if (retval) - goto out_free_idmap; + goto out_free_lockd; retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; @@ -774,30 +1197,33 @@ static int __init init_nfsd(void) out_free_all: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); -out_free_idmap: - nfsd_idmap_shutdown(); out_free_lockd: nfsd_lockd_shutdown(); - nfsd_export_shutdown(); -out_free_cache: nfsd_reply_cache_shutdown(); out_free_stat: nfsd_stat_shutdown(); + nfsd_fault_inject_cleanup(); +out_free_slabs: nfsd4_free_slabs(); +out_unregister_pernet: + unregister_pernet_subsys(&nfsd_net_ops); +out_unregister_notifier: + unregister_cld_notifier(); return retval; } static void __exit exit_nfsd(void) { - nfsd_export_shutdown(); nfsd_reply_cache_shutdown(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); - nfsd_idmap_shutdown(); nfsd4_free_slabs(); + nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); + unregister_pernet_subsys(&nfsd_net_ops); + unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h new file mode 100644 index 00000000000..847daf37e56 --- /dev/null +++ b/fs/nfsd/nfsd.h @@ -0,0 +1,407 @@ +/* + * Hodge-podge collection of knfsd-related stuff. + * I will sort this out later. + * + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef LINUX_NFSD_NFSD_H +#define LINUX_NFSD_NFSD_H + +#include <linux/types.h> +#include <linux/mount.h> + +#include <linux/nfs.h> +#include <linux/nfs2.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/msg_prot.h> + +#include <uapi/linux/nfsd/debug.h> + +#include "stats.h" +#include "export.h" + +#undef ifdebug +#ifdef NFSD_DEBUG +# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) +#else +# define ifdebug(flag) if (0) +#endif + +/* + * nfsd version + */ +#define NFSD_SUPPORTED_MINOR_VERSION 2 +/* + * Maximum blocksizes supported by daemon under various circumstances. + */ +#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD +/* NFSv2 is limited by the protocol specification, see RFC 1094 */ +#define NFSSVC_MAXBLKSIZE_V2 (8*1024) + + +/* + * Largest number of bytes we need to allocate for an NFS + * call or reply. Used to control buffer sizes. We use + * the length of v3 WRITE, READDIR and READDIR replies + * which are an RPC header, up to 26 XDR units of reply + * data, and some page data. + * + * Note that accuracy here doesn't matter too much as the + * size is rounded up to a page size when allocating space. + */ +#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) + +struct readdir_cd { + __be32 err; /* 0, nfserr, or nfserr_eof */ +}; + + +extern struct svc_program nfsd_program; +extern struct svc_version nfsd_version2, nfsd_version3, + nfsd_version4; +extern struct mutex nfsd_mutex; +extern spinlock_t nfsd_drc_lock; +extern unsigned long nfsd_drc_max_mem; +extern unsigned long nfsd_drc_mem_used; + +extern const struct seq_operations nfs_exports_op; + +/* + * Function prototypes. + */ +int nfsd_svc(int nrservs, struct net *net); +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); + +int nfsd_nrthreads(struct net *); +int nfsd_nrpools(struct net *); +int nfsd_get_nrthreads(int n, int *, struct net *); +int nfsd_set_nrthreads(int n, int *, struct net *); +int nfsd_pool_stats_open(struct inode *, struct file *); +int nfsd_pool_stats_release(struct inode *, struct file *); + +void nfsd_destroy(struct net *net); + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +#ifdef CONFIG_NFSD_V2_ACL +extern struct svc_version nfsd_acl_version2; +#else +#define nfsd_acl_version2 NULL +#endif +#ifdef CONFIG_NFSD_V3_ACL +extern struct svc_version nfsd_acl_version3; +#else +#define nfsd_acl_version3 NULL +#endif +#endif + +enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; +int nfsd_vers(int vers, enum vers_op change); +int nfsd_minorversion(u32 minorversion, enum vers_op change); +void nfsd_reset_versions(void); +int nfsd_create_serv(struct net *net); + +extern int nfsd_max_blksize; + +static inline int nfsd_v4client(struct svc_rqst *rq) +{ + return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; +} + +/* + * NFSv4 State + */ +#ifdef CONFIG_NFSD_V4 +extern unsigned long max_delegations; +int nfsd4_init_slabs(void); +void nfsd4_free_slabs(void); +int nfs4_state_start(void); +int nfs4_state_start_net(struct net *net); +void nfs4_state_shutdown(void); +void nfs4_state_shutdown_net(struct net *net); +void nfs4_reset_lease(time_t leasetime); +int nfs4_reset_recoverydir(char *recdir); +char * nfs4_recoverydir(void); +#else +static inline int nfsd4_init_slabs(void) { return 0; } +static inline void nfsd4_free_slabs(void) { } +static inline int nfs4_state_start(void) { return 0; } +static inline int nfs4_state_start_net(struct net *net) { return 0; } +static inline void nfs4_state_shutdown(void) { } +static inline void nfs4_state_shutdown_net(struct net *net) { } +static inline void nfs4_reset_lease(time_t leasetime) { } +static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } +static inline char * nfs4_recoverydir(void) {return NULL; } +#endif + +/* + * lockd binding + */ +void nfsd_lockd_init(void); +void nfsd_lockd_shutdown(void); + + +/* + * These macros provide pre-xdr'ed values for faster operation. + */ +#define nfs_ok cpu_to_be32(NFS_OK) +#define nfserr_perm cpu_to_be32(NFSERR_PERM) +#define nfserr_noent cpu_to_be32(NFSERR_NOENT) +#define nfserr_io cpu_to_be32(NFSERR_IO) +#define nfserr_nxio cpu_to_be32(NFSERR_NXIO) +#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN) +#define nfserr_acces cpu_to_be32(NFSERR_ACCES) +#define nfserr_exist cpu_to_be32(NFSERR_EXIST) +#define nfserr_xdev cpu_to_be32(NFSERR_XDEV) +#define nfserr_nodev cpu_to_be32(NFSERR_NODEV) +#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR) +#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR) +#define nfserr_inval cpu_to_be32(NFSERR_INVAL) +#define nfserr_fbig cpu_to_be32(NFSERR_FBIG) +#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC) +#define nfserr_rofs cpu_to_be32(NFSERR_ROFS) +#define nfserr_mlink cpu_to_be32(NFSERR_MLINK) +#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP) +#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG) +#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY) +#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT) +#define nfserr_stale cpu_to_be32(NFSERR_STALE) +#define nfserr_remote cpu_to_be32(NFSERR_REMOTE) +#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH) +#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE) +#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC) +#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP) +#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL) +#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT) +#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE) +#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX) +#define nfserr_denied cpu_to_be32(NFSERR_DENIED) +#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK) +#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED) +#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE) +#define nfserr_same cpu_to_be32(NFSERR_SAME) +#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE) +#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID) +#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE) +#define nfserr_moved cpu_to_be32(NFSERR_MOVED) +#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE) +#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH) +#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED) +#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID) +#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID) +#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID) +#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) +#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) +#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) +#define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE) +#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) +#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) +#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) +#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) +#define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER) +#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) +#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) +#define nfserr_grace cpu_to_be32(NFSERR_GRACE) +#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE) +#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD) +#define nfserr_badname cpu_to_be32(NFSERR_BADNAME) +#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN) +#define nfserr_locked cpu_to_be32(NFSERR_LOCKED) +#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC) +#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE) +#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT) +#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST) +#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION) +#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT) +#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY) +#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION) +#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED) +#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY) +#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER) +#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE) +#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT) +#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT) +#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE) +#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED) +#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS) +#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG) +#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG) +#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE) +#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP) +#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND) +#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS) +#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION) +#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP) +#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY) +#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE) +#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY) +#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT) +#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION) +#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP) +#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT) +#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP) +#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED) +#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE) +#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL) +#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG) +#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT) +#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) +#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) +#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) +#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) +#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) +#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) +#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) + +/* error codes for internal use */ +/* if a request fails due to kmalloc failure, it gets dropped. + * Client should resend eventually + */ +#define nfserr_dropit cpu_to_be32(30000) +/* end-of-file indicator in readdir */ +#define nfserr_eof cpu_to_be32(30001) +/* replay detected */ +#define nfserr_replay_me cpu_to_be32(11001) +/* nfs41 replay detected */ +#define nfserr_replay_cache cpu_to_be32(11002) + +/* Check for dir entries '.' and '..' */ +#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) + +#ifdef CONFIG_NFSD_V4 + +/* before processing a COMPOUND operation, we have to check that there + * is enough space in the buffer for XDR encode to succeed. otherwise, + * we might process an operation with side effects, and be unable to + * tell the client that the operation succeeded. + * + * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space + * needed to encode an "ordinary" _successful_ operation. (GETATTR, + * READ, READDIR, and READLINK have their own buffer checks.) if we + * fall below this level, we fail the next operation with NFS4ERR_RESOURCE. + * + * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space + * needed to encode an operation which has failed with NFS4ERR_RESOURCE. + * care is taken to ensure that we never fall below this level for any + * reason. + */ +#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ +#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ + +#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ + +/* + * The following attributes are currently not supported by the NFSv4 server: + * ARCHIVE (deprecated anyway) + * HIDDEN (unlikely to be supported any time soon) + * MIMETYPE (unlikely to be supported any time soon) + * QUOTA_* (will be supported in a forthcoming patch) + * SYSTEM (unlikely to be supported any time soon) + * TIME_BACKUP (unlikely to be supported any time soon) + * TIME_CREATE (unlikely to be supported any time soon) + */ +#define NFSD4_SUPPORTED_ATTRS_WORD0 \ +(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \ + | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \ + | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \ + | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \ + | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \ + | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \ + | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \ + | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \ + | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ + | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) + +#define NFSD4_SUPPORTED_ATTRS_WORD1 \ +(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ + | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ + | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ + | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ + | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) + +#define NFSD4_SUPPORTED_ATTRS_WORD2 0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ + NFSD4_SUPPORTED_ATTRS_WORD0 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ + NFSD4_SUPPORTED_ATTRS_WORD1 + +#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + +static inline u32 nfsd_suppattrs0(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 + : NFSD4_SUPPORTED_ATTRS_WORD0; +} + +static inline u32 nfsd_suppattrs1(u32 minorversion) +{ + return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1 + : NFSD4_SUPPORTED_ATTRS_WORD1; +} + +static inline u32 nfsd_suppattrs2(u32 minorversion) +{ + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } +} + +/* These will return ERR_INVAL if specified in GETATTR or READDIR. */ +#define NFSD_WRITEONLY_ATTRS_WORD1 \ + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + +/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ +#define NFSD_WRITEABLE_ATTRS_WORD0 \ + (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) +#define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else +#define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif + +#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ + NFSD_WRITEABLE_ATTRS_WORD0 +/* + * we currently store the exclusive create verifier in the v_{a,m}time + * attributes so the client can't set these at create time using EXCLUSIVE4_1 + */ +#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \ + (NFSD_WRITEABLE_ATTRS_WORD1 & \ + ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)) +#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ + NFSD_WRITEABLE_ATTRS_WORD2 + +extern int nfsd4_is_junction(struct dentry *dentry); +extern int register_cld_notifier(void); +extern void unregister_cld_notifier(void); +#else /* CONFIG_NFSD_V4 */ +static inline int nfsd4_is_junction(struct dentry *dentry) +{ + return 0; +} + +#define register_cld_notifier() 0 +#define unregister_cld_notifier() do { } while(0) + +#endif /* CONFIG_NFSD_V4 */ + +#endif /* LINUX_NFSD_NFSD_H */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3e6b3f41ee1..ec839341815 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/nfsfh.c - * * NFS server file handle treatment. * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> @@ -9,27 +7,16 @@ * ... and again Southern-Winter 2001 to support export_operations */ -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/unistd.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/dcache.h> #include <linux/exportfs.h> -#include <linux/mount.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/nfsd/nfsd.h> +#include "nfsd.h" +#include "vfs.h" #include "auth.h" #define NFSDDBG_FACILITY NFSDDBG_FH -static int nfsd_nr_verified; -static int nfsd_nr_put; - /* * our acceptability function. * if NOSUBTREECHECK, accept anything @@ -51,7 +38,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = permission(parent->d_inode, MAY_EXEC, NULL); + err = inode_permission(parent->d_inode, MAY_EXEC); if (err < 0) { dput(parent); break; @@ -60,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -72,39 +59,37 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) * the write call). */ static inline __be32 -nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type) +nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested) { - /* Type can be negative when creating hardlinks - not to a dir */ - if (type > 0 && (mode & S_IFMT) != type) { - if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) - return nfserr_symlink; - else if (type == S_IFDIR) - return nfserr_notdir; - else if ((mode & S_IFMT) == S_IFDIR) - return nfserr_isdir; - else - return nfserr_inval; - } - if (type < 0 && (mode & S_IFMT) == -type) { - if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) - return nfserr_symlink; - else if (type == -S_IFDIR) - return nfserr_isdir; - else - return nfserr_notdir; - } - return 0; + mode &= S_IFMT; + + if (requested == 0) /* the caller doesn't care */ + return nfs_ok; + if (mode == requested) + return nfs_ok; + /* + * v4 has an error more specific than err_notdir which we should + * return in preference to err_notdir: + */ + if (rqstp->rq_vers == 4 && mode == S_IFLNK) + return nfserr_symlink; + if (requested == S_IFDIR) + return nfserr_notdir; + if (mode == S_IFDIR) + return nfserr_isdir; + return nfserr_inval; } static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, struct svc_export *exp) { + int flags = nfsexp_flags(rqstp, exp); + /* Check if the request originated from a secure port. */ - if (!rqstp->rq_secure && EX_SECURE(exp)) { + if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk(KERN_WARNING - "nfsd: request from insecure port %s!\n", - svc_print_addr(rqstp, buf, sizeof(buf))); + dprintk("nfsd: request from insecure port %s!\n", + svc_print_addr(rqstp, buf, sizeof(buf))); return nfserr_perm; } @@ -112,177 +97,275 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, return nfserrno(nfsd_setuser(rqstp, exp)); } +static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, + struct dentry *dentry, struct svc_export *exp) +{ + if (!(exp->ex_flags & NFSEXP_V4ROOT)) + return nfs_ok; + /* + * v2/v3 clients have no need for the V4ROOT export--they use + * the mount protocl instead; also, further V4ROOT checks may be + * in v4-specific code, in which case v2/v3 clients could bypass + * them. + */ + if (!nfsd_v4client(rqstp)) + return nfserr_stale; + /* + * We're exposing only the directories and symlinks that have to be + * traversed on the way to real exports: + */ + if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) && + !S_ISLNK(dentry->d_inode->i_mode))) + return nfserr_stale; + /* + * A pseudoroot export gives permission to access only one + * single directory; the kernel has to make another upcall + * before granting access to anything else under it: + */ + if (unlikely(dentry != exp->ex_path.dentry)) + return nfserr_stale; + return nfs_ok; +} + /* - * Perform sanity checks on the dentry in a client's file handle. - * - * Note that the file handle dentry may need to be freed even after - * an error return. - * - * This is only called at the start of an nfsproc call, so fhp points to - * a svc_fh which is all 0 except for the over-the-wire file handle. + * Use the given filehandle to look up the corresponding export and + * dentry. On success, the results are used to set fh_export and + * fh_dentry. */ -__be32 -fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct svc_export *exp = NULL; - struct dentry *dentry; - __be32 error = 0; - - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); + struct fid *fid = NULL, sfid; + struct svc_export *exp; + struct dentry *dentry; + int fileid_type; + int data_left = fh->fh_size/4; + __be32 error; - if (!fhp->fh_dentry) { - struct fid *fid = NULL, sfid; - int fileid_type; - int data_left = fh->fh_size/4; - - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - if (rqstp->rq_vers == 4 && fh->fh_size == 0) - return nfserr_nofilehandle; - - if (fh->fh_version == 1) { - int len; - if (--data_left<0) goto out; - switch (fh->fh_auth_type) { - case 0: break; - default: goto out; - } - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) goto out; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - if ((data_left -= len)<0) goto out; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, - fh->fh_auth); - fid = (struct fid *)(fh->fh_auth + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; - if (fh->fh_size != NFS_FHSIZE) - goto out; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); - } + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + if (rqstp->rq_vers == 4 && fh->fh_size == 0) + return nfserr_nofilehandle; - error = nfserr_stale; - if (PTR_ERR(exp) == -ENOENT) - goto out; + if (fh->fh_version == 1) { + int len; - if (IS_ERR(exp)) { - error = nfserrno(PTR_ERR(exp)); - goto out; + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); + } else { + __u32 tfh[2]; + dev_t xdev; + ino_t xino; + + if (fh->fh_size != NFS_FHSIZE) + return error; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + } + error = nfserr_stale; + if (PTR_ERR(exp) == -ENOENT) + return error; + + if (IS_ERR(exp)) + return nfserrno(PTR_ERR(exp)); + + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { + /* Elevate privileges so that the lack of 'r' or 'x' + * permission on some parent directory will + * not stop exportfs_decode_fh from being able + * to reconnect a directory into the dentry cache. + * The same problem can affect "SUBTREECHECK" exports, + * but as nfsd_acceptable depends on correct + * access control settings being in effect, we cannot + * fix that case easily. + */ + struct cred *new = prepare_creds(); + if (!new) + return nfserrno(-ENOMEM); + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + put_cred(override_creds(new)); + put_cred(new); + } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) goto out; + } - /* - * Look up the dentry using the NFS file handle. - */ - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; - - if (fileid_type == FILEID_ROOT) - dentry = dget(exp->ex_path.dentry); - else { - dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); - } - if (dentry == NULL) - goto out; - if (IS_ERR(dentry)) { - if (PTR_ERR(dentry) != -EINVAL) - error = nfserrno(PTR_ERR(dentry)); - goto out; - } + /* + * Look up the dentry using the NFS file handle. + */ + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + + if (fh->fh_version != 1) { + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + } else + fileid_type = fh->fh_fileid_type; + + if (fileid_type == FILEID_ROOT) + dentry = dget(exp->ex_path.dentry); + else { + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + } + if (dentry == NULL) + goto out; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) != -EINVAL) + error = nfserrno(PTR_ERR(dentry)); + goto out; + } - if (S_ISDIR(dentry->d_inode->i_mode) && - (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - } + if (S_ISDIR(dentry->d_inode->i_mode) && + (dentry->d_flags & DCACHE_DISCONNECTED)) { + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", + dentry); + } - fhp->fh_dentry = dentry; - fhp->fh_export = exp; - nfsd_nr_verified++; - cache_get(&exp->h); - } else { - /* - * just rechecking permissions - * (e.g. nfsproc_create calls fh_verify, then nfsd_create - * does as well) - */ - dprintk("nfsd: fh_verify - just checking\n"); - dentry = fhp->fh_dentry; - exp = fhp->fh_export; - cache_get(&exp->h); - /* - * Set user creds for this exportpoint; necessary even - * in the "just checking" case because this may be a - * filehandle that was created by fh_compose, and that - * is about to be used in another nfsv4 compound - * operation. - */ - error = nfsd_setuser_and_check_port(rqstp, exp); + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + return 0; +out: + exp_put(exp); + return error; +} + +/** + * fh_verify - filehandle lookup and access checking + * @rqstp: pointer to current rpc request + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * Look up a dentry from the on-the-wire filehandle, check the client's + * access to the export, and set the current task's credentials. + * + * Regardless of success or failure of fh_verify(), fh_put() should be + * called on @fhp when the caller is finished with the filehandle. + * + * fh_verify() may be called multiple times on a given filehandle, for + * example, when processing an NFSv4 compound. The first call will look + * up a dentry using the on-the-wire filehandle. Subsequent calls will + * skip the lookup and just perform the other checks and possibly change + * the current task's credentials. + * + * @type specifies the type of object expected using one of the S_IF* + * constants defined in include/linux/stat.h. The caller may use zero + * to indicate that it doesn't care, or a negative integer to indicate + * that it expects something not of the given type. + * + * @access is formed from the NFSD_MAY_* constants defined in + * include/linux/nfsd/nfsd.h. + */ +__be32 +fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +{ + struct svc_export *exp; + struct dentry *dentry; + __be32 error; + + dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); + + if (!fhp->fh_dentry) { + error = nfsd_set_fh_dentry(rqstp, fhp); if (error) goto out; } + dentry = fhp->fh_dentry; + exp = fhp->fh_export; + /* + * We still have to do all these permission checks, even when + * fh_dentry is already set: + * - fh_verify may be called multiple times with different + * "access" arguments (e.g. nfsd_proc_create calls + * fh_verify(...,NFSD_MAY_EXEC) first, then later (in + * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE). + * - in the NFSv4 case, the filehandle may have been filled + * in by fh_compose, and given a dentry, but further + * compound operations performed with that filehandle + * still need permissions checks. In the worst case, a + * mountpoint crossing may have changed the export + * options, and we may now need to use a different uid + * (for example, if different id-squashing options are in + * effect on the new filesystem). + */ + error = check_pseudo_root(rqstp, dentry, exp); + if (error) + goto out; + + error = nfsd_setuser_and_check_port(rqstp, exp); + if (error) + goto out; error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); if (error) goto out; - if (!(access & MAY_LOCK)) { - /* - * pseudoflavor restrictions are not enforced on NLM, - * which clients virtually always use auth_sys for, - * even while using RPCSEC_GSS for NFS. - */ - error = check_nfsd_access(exp, rqstp); - if (error) - goto out; - } + /* + * pseudoflavor restrictions are not enforced on NLM, + * which clients virtually always use auth_sys for, + * even while using RPCSEC_GSS for NFS. + */ + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) + goto skip_pseudoflavor_check; + /* + * Clients may expect to be able to use auth_sys during mount, + * even if they use gss for everything else; see section 2.3.2 + * of rfc 2623. + */ + if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT + && exp->ex_path.dentry == dentry) + goto skip_pseudoflavor_check; + error = check_nfsd_access(exp, rqstp); + if (error) + goto out; + +skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %s/%s permission failure, " + dprintk("fh_verify: %pd2 permission failure, " "acc=%x, error=%d\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, + dentry, access, ntohl(error)); } out: - if (exp && !IS_ERR(exp)) - exp_put(exp); if (error == nfserr_stale) nfsdstats.fh_stale++; return error; @@ -301,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, { if (dentry != exp->ex_path.dentry) { struct fid *fid = (struct fid *) - (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1); + (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1); int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK); @@ -327,44 +410,51 @@ static inline void _fh_update_old(struct dentry *dentry, fh->ofh_dirino = 0; } -__be32 -fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, - struct svc_fh *ref_fh) +static bool is_root_export(struct svc_export *exp) { - /* ref_fh is a reference file handle. - * if it is non-null and for the same filesystem, then we should compose - * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * - */ + return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; +} - u8 version; - u8 fsid_type = 0; - struct inode * inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; - __u32 *datap; - dev_t ex_dev = exp->ex_path.dentry->d_inode->i_sb->s_dev; - int root_export = (exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root); +static struct super_block *exp_sb(struct svc_export *exp) +{ + return exp->ex_path.dentry->d_inode->i_sb; +} - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", - MAJOR(ex_dev), MINOR(ex_dev), - (long) exp->ex_path.dentry->d_inode->i_ino, - parent->d_name.name, dentry->d_name.name, - (inode ? inode->i_ino : 0)); +static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) +{ + switch (fsid_type) { + case FSID_DEV: + if (!old_valid_dev(exp_sb(exp)->s_dev)) + return 0; + /* FALL THROUGH */ + case FSID_MAJOR_MINOR: + case FSID_ENCODE_DEV: + return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; + case FSID_NUM: + return exp->ex_flags & NFSEXP_FSID; + case FSID_UUID8: + case FSID_UUID16: + if (!is_root_export(exp)) + return 0; + /* fall through */ + case FSID_UUID4_INUM: + case FSID_UUID16_INUM: + return exp->ex_uuid != NULL; + } + return 1; +} - /* Choose filehandle version and fsid type based on - * the reference filehandle (if it is in the same export) - * or the export options. - */ - retry: + +static void set_version_and_fsid_type(struct svc_fh *fhp, struct svc_export *exp, struct svc_fh *ref_fh) +{ + u8 version; + u8 fsid_type; +retry: version = 1; if (ref_fh && ref_fh->fh_export == exp) { version = ref_fh->fh_handle.fh_version; fsid_type = ref_fh->fh_handle.fh_fsid_type; - if (ref_fh == fhp) - fh_put(ref_fh); ref_fh = NULL; switch (version) { @@ -377,76 +467,82 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, goto retry; } - /* Need to check that this type works for this - * export point. As the fsid -> filesystem mapping - * was guided by user-space, there is no guarantee - * that the filesystem actually supports that fsid - * type. If it doesn't we loop around again without - * ref_fh set. + /* + * As the fsid -> filesystem mapping was guided by + * user-space, there is no guarantee that the filesystem + * actually supports that fsid type. If it doesn't we + * loop around again without ref_fh set. */ - switch(fsid_type) { - case FSID_DEV: - if (!old_valid_dev(ex_dev)) - goto retry; - /* FALL THROUGH */ - case FSID_MAJOR_MINOR: - case FSID_ENCODE_DEV: - if (!(exp->ex_path.dentry->d_inode->i_sb->s_type->fs_flags - & FS_REQUIRES_DEV)) - goto retry; - break; - case FSID_NUM: - if (! (exp->ex_flags & NFSEXP_FSID)) - goto retry; - break; - case FSID_UUID8: - case FSID_UUID16: - if (!root_export) - goto retry; - /* fall through */ - case FSID_UUID4_INUM: - case FSID_UUID16_INUM: - if (exp->ex_uuid == NULL) - goto retry; - break; - } + if (!fsid_type_ok_for_exp(fsid_type, exp)) + goto retry; + } else if (exp->ex_flags & NFSEXP_FSID) { + fsid_type = FSID_NUM; } else if (exp->ex_uuid) { if (fhp->fh_maxsize >= 64) { - if (root_export) + if (is_root_export(exp)) fsid_type = FSID_UUID16; else fsid_type = FSID_UUID16_INUM; } else { - if (root_export) + if (is_root_export(exp)) fsid_type = FSID_UUID8; else fsid_type = FSID_UUID4_INUM; } - } else if (exp->ex_flags & NFSEXP_FSID) - fsid_type = FSID_NUM; - else if (!old_valid_dev(ex_dev)) + } else if (!old_valid_dev(exp_sb(exp)->s_dev)) /* for newer device numbers, we must use a newer fsid format */ fsid_type = FSID_ENCODE_DEV; else fsid_type = FSID_DEV; + fhp->fh_handle.fh_version = version; + if (version) + fhp->fh_handle.fh_fsid_type = fsid_type; +} + +__be32 +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, + struct svc_fh *ref_fh) +{ + /* ref_fh is a reference file handle. + * if it is non-null and for the same filesystem, then we should compose + * a filehandle which is of the same version, where possible. + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca + * Then create a 32byte filehandle using nfs_fhbase_old + * + */ + + struct inode * inode = dentry->d_inode; + dev_t ex_dev = exp_sb(exp)->s_dev; + + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", + MAJOR(ex_dev), MINOR(ex_dev), + (long) exp->ex_path.dentry->d_inode->i_ino, + dentry, + (inode ? inode->i_ino : 0)); + + /* Choose filehandle version and fsid type based on + * the reference filehandle (if it is in the same export) + * or the export options. + */ + set_version_and_fsid_type(fhp, exp, ref_fh); if (ref_fh == fhp) fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", - parent->d_name.name, dentry->d_name.name); + printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", + dentry); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", fhp->fh_maxsize, - parent->d_name.name, dentry->d_name.name); + dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; cache_get(&exp->h); - if (version == 0xca) { + if (fhp->fh_handle.fh_version == 0xca) { /* old style filehandle please */ memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; @@ -459,26 +555,24 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, if (inode) _fh_update_old(dentry, exp, &fhp->fh_handle); } else { - int len; - fhp->fh_handle.fh_version = 1; + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; fhp->fh_handle.fh_auth_type = 0; - datap = fhp->fh_handle.fh_auth+0; - fhp->fh_handle.fh_fsid_type = fsid_type; - mk_fsid(fsid_type, datap, ex_dev, + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, exp->ex_path.dentry->d_inode->i_ino, exp->ex_fsid, exp->ex_uuid); - len = key_len(fsid_type); - datap += len/4; - fhp->fh_handle.fh_size = 4 + len; - if (inode) _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); return nfserr_opnotsupp; + } } - nfsd_nr_verified++; return 0; } @@ -501,22 +595,20 @@ fh_update(struct svc_fh *fhp) _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - goto out; + return 0; _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == 255) + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } -out: return 0; - out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); - goto out; + return nfserr_serverfault; out_negative: - printk(KERN_ERR "fh_update: %s/%s still negative!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; + printk(KERN_ERR "fh_update: %pd2 still negative!\n", + dentry); + return nfserr_serverfault; } /* @@ -535,10 +627,10 @@ fh_put(struct svc_fh *fhp) fhp->fh_pre_saved = 0; fhp->fh_post_saved = 0; #endif - nfsd_nr_put++; } + fh_drop_write(fhp); if (exp) { - cache_put(&exp->h, &svc_export_cache); + exp_put(exp); fhp->fh_export = NULL; } return; @@ -571,8 +663,7 @@ enum fsid_source fsid_source(struct svc_fh *fhp) case FSID_DEV: case FSID_ENCODE_DEV: case FSID_MAJOR_MINOR: - if (fhp->fh_export->ex_path.dentry->d_inode->i_sb->s_type->fs_flags - & FS_REQUIRES_DEV) + if (exp_sb(fhp->fh_export)->s_type->fs_flags & FS_REQUIRES_DEV) return FSIDSOURCE_DEV; break; case FSID_NUM: diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h new file mode 100644 index 00000000000..2e89e70ac15 --- /dev/null +++ b/fs/nfsd/nfsfh.h @@ -0,0 +1,267 @@ +/* + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * This file describes the layout of the file handles as passed + * over the wire. + */ +#ifndef _LINUX_NFSD_NFSFH_H +#define _LINUX_NFSD_NFSFH_H + +#include <linux/sunrpc/svc.h> +#include <uapi/linux/nfsd/nfsfh.h> + +static inline __u32 ino_t_to_u32(ino_t ino) +{ + return (__u32) ino; +} + +static inline ino_t u32_to_ino_t(__u32 uino) +{ + return (ino_t) uino; +} + +/* + * This is the internal representation of an NFS handle used in knfsd. + * pre_mtime/post_version will be used to support wcc_attr's in NFSv3. + */ +typedef struct svc_fh { + struct knfsd_fh fh_handle; /* FH data */ + struct dentry * fh_dentry; /* validated dentry */ + struct svc_export * fh_export; /* export pointer */ + int fh_maxsize; /* max size for fh_handle */ + + unsigned char fh_locked; /* inode locked by us */ + unsigned char fh_want_write; /* remount protection taken */ + +#ifdef CONFIG_NFSD_V3 + unsigned char fh_post_saved; /* post-op attrs saved */ + unsigned char fh_pre_saved; /* pre-op attrs saved */ + + /* Pre-op attributes saved during fh_lock */ + __u64 fh_pre_size; /* size before operation */ + struct timespec fh_pre_mtime; /* mtime before oper */ + struct timespec fh_pre_ctime; /* ctime before oper */ + /* + * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) + * to find out if it is valid. + */ + u64 fh_pre_change; + + /* Post-op attributes saved in fh_unlock */ + struct kstat fh_post_attr; /* full attrs after operation */ + u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ + +} svc_fh; + +enum nfsd_fsid { + FSID_DEV = 0, + FSID_NUM, + FSID_MAJOR_MINOR, + FSID_ENCODE_DEV, + FSID_UUID4_INUM, + FSID_UUID8, + FSID_UUID16, + FSID_UUID16_INUM, +}; + +enum fsid_source { + FSIDSOURCE_DEV, + FSIDSOURCE_FSID, + FSIDSOURCE_UUID, +}; +extern enum fsid_source fsid_source(struct svc_fh *fhp); + + +/* This might look a little large to "inline" but in all calls except + * one, 'vers' is constant so moste of the function disappears. + */ +static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, + u32 fsid, unsigned char *uuid) +{ + u32 *up; + switch(vers) { + case FSID_DEV: + fsidv[0] = htonl((MAJOR(dev)<<16) | + MINOR(dev)); + fsidv[1] = ino_t_to_u32(ino); + break; + case FSID_NUM: + fsidv[0] = fsid; + break; + case FSID_MAJOR_MINOR: + fsidv[0] = htonl(MAJOR(dev)); + fsidv[1] = htonl(MINOR(dev)); + fsidv[2] = ino_t_to_u32(ino); + break; + + case FSID_ENCODE_DEV: + fsidv[0] = new_encode_dev(dev); + fsidv[1] = ino_t_to_u32(ino); + break; + + case FSID_UUID4_INUM: + /* 4 byte fsid and inode number */ + up = (u32*)uuid; + fsidv[0] = ino_t_to_u32(ino); + fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3]; + break; + + case FSID_UUID8: + /* 8 byte fsid */ + up = (u32*)uuid; + fsidv[0] = up[0] ^ up[2]; + fsidv[1] = up[1] ^ up[3]; + break; + + case FSID_UUID16: + /* 16 byte fsid - NFSv3+ only */ + memcpy(fsidv, uuid, 16); + break; + + case FSID_UUID16_INUM: + /* 8 byte inode and 16 byte fsid */ + *(u64*)fsidv = (u64)ino; + memcpy(fsidv+2, uuid, 16); + break; + default: BUG(); + } +} + +static inline int key_len(int type) +{ + switch(type) { + case FSID_DEV: return 8; + case FSID_NUM: return 4; + case FSID_MAJOR_MINOR: return 12; + case FSID_ENCODE_DEV: return 8; + case FSID_UUID4_INUM: return 8; + case FSID_UUID8: return 8; + case FSID_UUID16: return 16; + case FSID_UUID16_INUM: return 24; + default: return 0; + } +} + +/* + * Shorthand for dprintk()'s + */ +extern char * SVCFH_fmt(struct svc_fh *fhp); + +/* + * Function prototypes + */ +__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int); +__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *); +__be32 fh_update(struct svc_fh *); +void fh_put(struct svc_fh *); + +static __inline__ struct svc_fh * +fh_copy(struct svc_fh *dst, struct svc_fh *src) +{ + WARN_ON(src->fh_dentry || src->fh_locked); + + *dst = *src; + return dst; +} + +static inline void +fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) +{ + dst->fh_size = src->fh_size; + memcpy(&dst->fh_base, &src->fh_base, src->fh_size); +} + +static __inline__ struct svc_fh * +fh_init(struct svc_fh *fhp, int maxsize) +{ + memset(fhp, 0, sizeof(*fhp)); + fhp->fh_maxsize = maxsize; + return fhp; +} + +#ifdef CONFIG_NFSD_V3 +/* + * The wcc data stored in current_fh should be cleared + * between compound ops. + */ +static inline void +fh_clear_wcc(struct svc_fh *fhp) +{ + fhp->fh_post_saved = 0; + fhp->fh_pre_saved = 0; +} + +/* + * Fill in the pre_op attr for the wcc data + */ +static inline void +fill_pre_wcc(struct svc_fh *fhp) +{ + struct inode *inode; + + inode = fhp->fh_dentry->d_inode; + if (!fhp->fh_pre_saved) { + fhp->fh_pre_mtime = inode->i_mtime; + fhp->fh_pre_ctime = inode->i_ctime; + fhp->fh_pre_size = inode->i_size; + fhp->fh_pre_change = inode->i_version; + fhp->fh_pre_saved = 1; + } +} + +extern void fill_post_wcc(struct svc_fh *); +#else +#define fh_clear_wcc(ignored) +#define fill_pre_wcc(ignored) +#define fill_post_wcc(notused) +#endif /* CONFIG_NFSD_V3 */ + + +/* + * Lock a file handle/inode + * NOTE: both fh_lock and fh_unlock are done "by hand" in + * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once + * so, any changes here should be reflected there. + */ + +static inline void +fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) +{ + struct dentry *dentry = fhp->fh_dentry; + struct inode *inode; + + BUG_ON(!dentry); + + if (fhp->fh_locked) { + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); + return; + } + + inode = dentry->d_inode; + mutex_lock_nested(&inode->i_mutex, subclass); + fill_pre_wcc(fhp); + fhp->fh_locked = 1; +} + +static inline void +fh_lock(struct svc_fh *fhp) +{ + fh_lock_nested(fhp, I_MUTEX_NORMAL); +} + +/* + * Unlock a file handle/inode + */ +static inline void +fh_unlock(struct svc_fh *fhp) +{ + if (fhp->fh_locked) { + fill_post_wcc(fhp); + mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); + fhp->fh_locked = 0; + } +} + +#endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6cfc96a1248..54c6b3d3cc7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -1,29 +1,14 @@ /* - * nfsproc2.c Process version 2 NFS requests. - * linux/fs/nfsd/nfs2proc.c - * * Process version 2 NFS requests. * * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/linkage.h> -#include <linux/time.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> #include <linux/namei.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> +#include "cache.h" +#include "xdr.h" +#include "vfs.h" typedef struct svc_rqst svc_rqst; typedef struct svc_buf svc_buf; @@ -41,17 +26,13 @@ static __be32 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } static __be32 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) { if (err) return err; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* * Get a file's attributes @@ -65,7 +46,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); return nfsd_return_attrs(nfserr, resp); } @@ -158,15 +140,13 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; - nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, + nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, argp->vlen, &resp->count); if (nfserr) return nfserr; - return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, - resp->fh.fh_dentry, - &resp->stat)); + return fh_getattr(&resp->fh, &resp->stat); } /* @@ -179,6 +159,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, { __be32 nfserr; int stable = 1; + unsigned long cnt = argp->len; dprintk("nfsd: WRITE %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), @@ -187,7 +168,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, argp->offset, rqstp->rq_vec, argp->vlen, - argp->len, + &cnt, &stable); return nfsd_return_attrs(nfserr, resp); } @@ -209,17 +190,18 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, struct dentry *dchild; int type, mode; __be32 nfserr; + int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); dprintk("nfsd: CREATE %s %.*s\n", SVCFH_fmt(dirfhp), argp->len, argp->name); /* First verify the parent file handle */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC); + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); if (nfserr) goto done; /* must fh_put dirfhp even on error */ - /* Check for MAY_WRITE in nfsd_create if necessary */ + /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */ nfserr = nfserr_acces; if (!argp->len) @@ -227,6 +209,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; + hosterr = fh_want_write(dirfhp); + if (hosterr) { + nfserr = nfserrno(hosterr); + goto done; + } + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { @@ -281,7 +269,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, nfserr = nfsd_permission(rqstp, newfhp->fh_export, newfhp->fh_dentry, - MAY_WRITE|MAY_LOCAL_ACCESS); + NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); if (nfserr && nfserr != nfserr_rofs) goto out_unlock; } @@ -303,7 +291,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, * gospel of sun micro */ if (type != S_IFREG) { - int is_borc = 0; if (type != S_IFBLK && type != S_IFCHR) { rdev = 0; } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { @@ -311,7 +298,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, type = S_IFIFO; } else { /* Okay, char or block special */ - is_borc = 1; if (!rdev) rdev = wanted; } @@ -345,7 +331,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, out_unlock: /* We don't really need to unlock, as fh_put does it. */ fh_unlock(dirfhp); - + fh_drop_write(dirfhp); done: fh_put(dirfhp); return nfsd_return_dirop(nfserr, resp); @@ -521,7 +507,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, + NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); return nfserr; } @@ -530,45 +517,179 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfsd_proc_none NULL -#define nfssvc_release_none NULL struct nfsd_void { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd_proc_##name, \ - (kxdrproc_t) nfssvc_decode_##argt, \ - (kxdrproc_t) nfssvc_encode_##rest, \ - (kxdrproc_t) nfssvc_release_##relt, \ - sizeof(struct nfsd_##argt), \ - sizeof(struct nfsd_##rest), \ - 0, \ - cache, \ - respsize, \ - } - #define ST 1 /* status */ #define FH 8 /* filehandle */ #define AT 18 /* attributes */ static struct svc_procedure nfsd_procedures2[18] = { - PROC(null, void, void, none, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), - PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), - PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(remove, diropargs, void, none, RC_REPLSTAT, ST), - PROC(rename, renameargs, void, none, RC_REPLSTAT, ST), - PROC(link, linkargs, void, none, RC_REPLSTAT, ST), - PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST), - PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST), - PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0), - PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5), + [NFSPROC_NULL] = { + .pc_func = (svc_procfunc) nfsd_proc_null, + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_getattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_setattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_ROOT] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd_proc_lookup, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_readlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres, + .pc_argsize = sizeof(struct nfsd_readlinkargs), + .pc_ressize = sizeof(struct nfsd_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, + }, + [NFSPROC_READ] = { + .pc_func = (svc_procfunc) nfsd_proc_read, + .pc_decode = (kxdrproc_t) nfssvc_decode_readargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_readargs), + .pc_ressize = sizeof(struct nfsd_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + }, + [NFSPROC_WRITECACHE] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd_proc_write, + .pc_decode = (kxdrproc_t) nfssvc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd_proc_create, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd_proc_remove, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd_proc_rename, + .pc_decode = (kxdrproc_t) nfssvc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_renameargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_LINK] = { + .pc_func = (svc_procfunc) nfsd_proc_link, + .pc_decode = (kxdrproc_t) nfssvc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_linkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_symlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_symlinkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_mkdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_rmdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_readdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readdirres, + .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_ressize = sizeof(struct nfsd_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFSPROC_STATFS] = { + .pc_func = (svc_procfunc) nfsd_proc_statfs, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_statfsres, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_statfsres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+5, + }, }; @@ -614,11 +735,14 @@ nfserrno (int errno) #endif { nfserr_stale, -ESTALE }, { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_dropit, -EAGAIN }, - { nfserr_dropit, -ENOMEM }, - { nfserr_badname, -ESRCH }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, { nfserr_io, -ETXTBSY }, { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, }; int i; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9647b0f7bc0..1879e43f286 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/nfssvc.c - * * Central processing for nfsd. * * Authors: Olaf Kirch (okir@monad.swb.de) @@ -8,55 +6,61 @@ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/module.h> #include <linux/sched.h> -#include <linux/time.h> -#include <linux/errno.h> -#include <linux/nfs.h> -#include <linux/in.h> -#include <linux/uio.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/freezer.h> +#include <linux/module.h> #include <linux/fs_struct.h> +#include <linux/swap.h> -#include <linux/sunrpc/types.h> #include <linux/sunrpc/stats.h> -#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> -#include <linux/sunrpc/cache.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/stats.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/syscall.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> +#include <linux/seq_file.h> +#include <net/net_namespace.h> +#include "nfsd.h" +#include "cache.h" +#include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_SVC -/* these signals will be delivered to an nfsd thread - * when handling a request - */ -#define ALLOWED_SIGS (sigmask(SIGKILL)) -/* these signals will be delivered to an nfsd thread - * when not handling a request. i.e. when waiting - */ -#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT)) -/* if the last thread dies with SIGHUP, then the exports table is - * left unchanged ( like 2.4-{0-9} ). Any other signal will clear - * the exports table (like 2.2). +extern struct svc_program nfsd_program; +static int nfsd(void *vrqstp); + +/* + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members + * of the svc_serv struct. In particular, ->sv_nrthreads but also to some + * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * + * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number + * of nfsd threads must exist and each must listed in ->sp_all_threads in each + * entry of ->sv_pools[]. + * + * Transitions of the thread count between zero and non-zero are of particular + * interest since the svc_serv needs to be created and initialized at that + * point, or freed. + * + * Finally, the nfsd_mutex also protects some of the global variables that are + * accessed when nfsd starts and that are settable via the write_* routines in + * nfsctl.c. In particular: + * + * user_recovery_dirname + * user_lease_time + * nfsd_versions */ -#define SIG_NOCLEAN SIGHUP +DEFINE_MUTEX(nfsd_mutex); -extern struct svc_program nfsd_program; -static void nfsd(struct svc_rqst *rqstp); -struct timeval nfssvc_boot; - struct svc_serv *nfsd_serv; -static atomic_t nfsd_busy; -static unsigned long nfsd_last_call; -static DEFINE_SPINLOCK(nfsd_call_lock); +/* + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. + * nfsd_drc_max_pages limits the total amount of memory available for + * version 4.1 DRC caches. + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. + */ +spinlock_t nfsd_drc_lock; +unsigned long nfsd_drc_max_mem; +unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -112,10 +116,15 @@ struct svc_program nfsd_program = { }; +static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { + [0] = 1, + [1] = 1, +}; + int nfsd_vers(int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) - return -1; + return 0; switch(change) { case NFSD_SET: nfsd_versions[vers] = nfsd_version[vers]; @@ -138,35 +147,180 @@ int nfsd_vers(int vers, enum vers_op change) } return 0; } + +int nfsd_minorversion(u32 minorversion, enum vers_op change) +{ + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + return -1; + switch(change) { + case NFSD_SET: + nfsd_supported_minorversions[minorversion] = true; + break; + case NFSD_CLEAR: + nfsd_supported_minorversions[minorversion] = false; + break; + case NFSD_TEST: + return nfsd_supported_minorversions[minorversion]; + case NFSD_AVAIL: + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + } + return 0; +} + /* * Maximum number of nfsd processes */ #define NFSD_MAXSERVS 8192 -int nfsd_nrthreads(void) +int nfsd_nrthreads(struct net *net) { - if (nfsd_serv == NULL) + int rv = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv) + rv = nn->nfsd_serv->sv_nrthreads; + mutex_unlock(&nfsd_mutex); + return rv; +} + +static int nfsd_init_socks(struct net *net) +{ + int error; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - else - return nfsd_serv->sv_nrthreads; + + error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS); + if (error < 0) + return error; + + error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS); + if (error < 0) + return error; + + return 0; } -static int killsig; /* signal that was used to kill last nfsd */ -static void nfsd_last_thread(struct svc_serv *serv) +static int nfsd_users = 0; + +static int nfsd_startup_generic(int nrservs) { - /* When last nfsd thread exits we need to do some clean-up */ - struct svc_xprt *xprt; - list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) - lockd_down(); - nfsd_serv = NULL; + int ret; + + if (nfsd_users++) + return 0; + + /* + * Readahead param cache - will no-op if it already exists. + * (Note therefore results will be suboptimal if number of + * threads is modified after nfsd start.) + */ + ret = nfsd_racache_init(2*nrservs); + if (ret) + return ret; + ret = nfs4_state_start(); + if (ret) + goto out_racache; + return 0; + +out_racache: nfsd_racache_shutdown(); + return ret; +} + +static void nfsd_shutdown_generic(void) +{ + if (--nfsd_users) + return; + nfs4_state_shutdown(); + nfsd_racache_shutdown(); +} + +static bool nfsd_needs_lockd(void) +{ +#if defined(CONFIG_NFSD_V3) + return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL); +#else + return (nfsd_versions[2] != NULL); +#endif +} + +static int nfsd_startup_net(int nrservs, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int ret; + + if (nn->nfsd_net_up) + return 0; + + ret = nfsd_startup_generic(nrservs); + if (ret) + return ret; + ret = nfsd_init_socks(net); + if (ret) + goto out_socks; + + if (nfsd_needs_lockd() && !nn->lockd_up) { + ret = lockd_up(net); + if (ret) + goto out_socks; + nn->lockd_up = 1; + } + + ret = nfs4_state_start_net(net); + if (ret) + goto out_lockd; - printk(KERN_WARNING "nfsd: last server has exited\n"); - if (killsig != SIG_NOCLEAN) { - printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); - nfsd_export_flush(); + nn->nfsd_net_up = true; + return 0; + +out_lockd: + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; } +out_socks: + nfsd_shutdown_generic(); + return ret; +} + +static void nfsd_shutdown_net(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs4_state_shutdown_net(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = 0; + } + nn->nfsd_net_up = false; + nfsd_shutdown_generic(); +} + +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + /* + * write_ports can create the server without actually starting + * any threads--if we get shut down before any threads are + * started, then nfsd_last_thread will be run before any of this + * other initialization has been done. + */ + if (!nn->nfsd_net_up) + return; + nfsd_shutdown_net(net); + + svc_rpcb_cleanup(serv, net); + + printk(KERN_WARNING "nfsd: last server has exited, flushing export " + "cache\n"); + nfsd_export_flush(net); } void nfsd_reset_versions(void) @@ -190,105 +344,127 @@ void nfsd_reset_versions(void) } } -int nfsd_create_serv(void) +/* + * Each session guarantees a negotiated per slot memory cache for replies + * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated + * NFSv4.1 server might want to use more memory for a DRC than a machine + * with mutiple services. + * + * Impose a hard limit on the number of pages for the DRC which varies + * according to the machines free pages. This is of course only a default. + * + * For now this is a #defined shift which could be under admin control + * in the future. + */ +static void set_max_drc(void) { - int err = 0; - lock_kernel(); - if (nfsd_serv) { - svc_get(nfsd_serv); - unlock_kernel(); - return 0; - } - if (nfsd_max_blksize == 0) { - /* choose a suitable default */ - struct sysinfo i; - si_meminfo(&i); - /* Aim for 1/4096 of memory per thread - * This gives 1MB on 4Gig machines - * But only uses 32K on 128M machines. - * Bottom out at 8K on 32M and smaller. - * Of course, this is only a default. - */ - nfsd_max_blksize = NFSSVC_MAXBLKSIZE; - i.totalram <<= PAGE_SHIFT - 12; - while (nfsd_max_blksize > i.totalram && - nfsd_max_blksize >= 8*1024*2) - nfsd_max_blksize /= 2; - } + #define NFSD_DRC_SIZE_SHIFT 10 + nfsd_drc_max_mem = (nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; + nfsd_drc_mem_used = 0; + spin_lock_init(&nfsd_drc_lock); + dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); +} - atomic_set(&nfsd_busy, 0); - nfsd_serv = svc_create_pooled(&nfsd_program, - nfsd_max_blksize, - nfsd_last_thread, - nfsd, SIG_NOCLEAN, THIS_MODULE); - if (nfsd_serv == NULL) - err = -ENOMEM; - unlock_kernel(); - do_gettimeofday(&nfssvc_boot); /* record boot time */ - return err; +static int nfsd_get_default_max_blksize(void) +{ + struct sysinfo i; + unsigned long long target; + unsigned long ret; + + si_meminfo(&i); + target = (i.totalram - i.totalhigh) << PAGE_SHIFT; + /* + * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig + * machines, but only uses 32K on 128M machines. Bottom out at + * 8K on 32M and smaller. Of course, this is only a default. + */ + target >>= 12; + + ret = NFSSVC_MAXBLKSIZE; + while (ret > target && ret >= 8*1024*2) + ret /= 2; + return ret; } -static int nfsd_init_socks(int port) +int nfsd_create_serv(struct net *net) { int error; - if (!list_empty(&nfsd_serv->sv_permsocks)) - return 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - error = lockd_up(IPPROTO_UDP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "udp", port, - SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + if (nn->nfsd_serv) { + svc_get(nn->nfsd_serv); + return 0; } - if (error < 0) + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); + nfsd_reset_versions(); + nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nfsd_last_thread, nfsd, THIS_MODULE); + if (nn->nfsd_serv == NULL) + return -ENOMEM; + + error = svc_bind(nn->nfsd_serv, net); + if (error < 0) { + svc_destroy(nn->nfsd_serv); return error; - -#ifdef CONFIG_NFSD_TCP - error = lockd_up(IPPROTO_TCP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "tcp", port, - SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); } - if (error < 0) - return error; -#endif + + set_max_drc(); + do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ return 0; } -int nfsd_nrpools(void) +int nfsd_nrpools(struct net *net) { - if (nfsd_serv == NULL) + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + if (nn->nfsd_serv == NULL) return 0; else - return nfsd_serv->sv_nrpools; + return nn->nfsd_serv->sv_nrpools; } -int nfsd_get_nrthreads(int n, int *nthreads) +int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (nfsd_serv != NULL) { - for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads; + if (nn->nfsd_serv != NULL) { + for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) + nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; } return 0; } -int nfsd_set_nrthreads(int n, int *nthreads) +void nfsd_destroy(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int destroy = (nn->nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nn->nfsd_serv, net); + svc_destroy(nn->nfsd_serv); + if (destroy) + nn->nfsd_serv = NULL; +} + +int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; int tot = 0; int err = 0; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - if (nfsd_serv == NULL || n <= 0) + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + + if (nn->nfsd_serv == NULL || n <= 0) return 0; - if (n > nfsd_serv->sv_nrpools) - n = nfsd_serv->sv_nrpools; + if (n > nn->nfsd_serv->sv_nrpools) + n = nn->nfsd_serv->sv_nrpools; /* enforce a global maximum number of threads */ tot = 0; @@ -318,178 +494,140 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - lock_kernel(); - svc_get(nfsd_serv); + svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], + err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } - svc_destroy(nfsd_serv); - unlock_kernel(); - + nfsd_destroy(net); return err; } +/* + * Adjust the number of threads and return the new number of threads. + * This is also the function that starts the server if necessary, if + * this is the first time nrservs is nonzero. + */ int -nfsd_svc(unsigned short port, int nrservs) +nfsd_svc(int nrservs, struct net *net) { int error; - - lock_kernel(); + bool nfsd_up_before; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - error = -EINVAL; if (nrservs <= 0) nrservs = 0; if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; - - /* Readahead param cache - will no-op if it already exists */ - error = nfsd_racache_init(2*nrservs); - if (error<0) + error = 0; + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - nfs4_state_start(); - nfsd_reset_versions(); + error = nfsd_create_serv(net); + if (error) + goto out; - error = nfsd_create_serv(); + nfsd_up_before = nn->nfsd_net_up; + error = nfsd_startup_net(nrservs, net); if (error) - goto out; - error = nfsd_init_socks(port); + goto out_destroy; + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) - goto failure; - - error = svc_set_num_threads(nfsd_serv, NULL, nrservs); - failure: - svc_destroy(nfsd_serv); /* Release server */ - out: - unlock_kernel(); + goto out_shutdown; + /* We are holding a reference to nn->nfsd_serv which + * we don't want to count in the return value, + * so subtract 1 + */ + error = nn->nfsd_serv->sv_nrthreads - 1; +out_shutdown: + if (error < 0 && !nfsd_up_before) + nfsd_shutdown_net(net); +out_destroy: + nfsd_destroy(net); /* Release server */ +out: + mutex_unlock(&nfsd_mutex); return error; } -static inline void -update_thread_usage(int busy_threads) -{ - unsigned long prev_call; - unsigned long diff; - int decile; - - spin_lock(&nfsd_call_lock); - prev_call = nfsd_last_call; - nfsd_last_call = jiffies; - decile = busy_threads*10/nfsdstats.th_cnt; - if (decile>0 && decile <= 10) { - diff = nfsd_last_call - prev_call; - if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) - nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; - if (decile == 10) - nfsdstats.th_fullcnt++; - } - spin_unlock(&nfsd_call_lock); -} /* * This is the NFS server kernel thread */ -static void -nfsd(struct svc_rqst *rqstp) +static int +nfsd(void *vrqstp) { - struct fs_struct *fsp; - int err; - sigset_t shutdown_mask, allowed_mask; + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; + struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); + struct net *net = perm_sock->xpt_net; + int err; /* Lock module and set up kernel thread */ - lock_kernel(); - daemonize("nfsd"); + mutex_lock(&nfsd_mutex); - /* After daemonize() this kernel thread shares current->fs + /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ - fsp = copy_fs_struct(current->fs); - if (!fsp) { + if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } - exit_fs(current); - current->fs = fsp; + current->fs->umask = 0; - siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); - siginitsetinv(&allowed_mask, ALLOWED_SIGS); + /* + * thread is spawned with all signals set to SIG_IGN, re-enable + * the ones that will bring down the thread + */ + allow_signal(SIGKILL); + allow_signal(SIGHUP); + allow_signal(SIGINT); + allow_signal(SIGQUIT); nfsdstats.th_cnt++; + mutex_unlock(&nfsd_mutex); - rqstp->rq_task = current; - - unlock_kernel(); - - /* - * We want less throttling in balance_dirty_pages() so that nfs to - * localhost doesn't cause nfsd to lock up due to all the client's - * dirty pages. - */ - current->flags |= PF_LESS_THROTTLE; set_freezable(); /* * The main request loop */ for (;;) { - /* Block all but the shutdown signals */ - sigprocmask(SIG_SETMASK, &shutdown_mask, NULL); - /* * Find a socket with data available and call its * recvfrom routine. */ while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) ; - if (err < 0) + if (err == -EINTR) break; - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_inc(&nfsd_busy); - - /* Lock the export hash tables for reading. */ - exp_readlock(); - - /* Process request with signals blocked. */ - sigprocmask(SIG_SETMASK, &allowed_mask, NULL); - + validate_process_creds(); svc_process(rqstp); - - /* Unlock export hash tables */ - exp_readunlock(); - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_dec(&nfsd_busy); + validate_process_creds(); } - if (err != -EINTR) { - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); - } else { - unsigned int signo; - - for (signo = 1; signo <= _NSIG; signo++) - if (sigismember(¤t->pending.signal, signo) && - !sigismember(¤t->blocked, signo)) - break; - killsig = signo; - } /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - lock_kernel(); - + mutex_lock(&nfsd_mutex); nfsdstats.th_cnt --; out: + rqstp->rq_server = NULL; + /* Release the thread */ svc_exit_thread(rqstp); + nfsd_destroy(net); + /* Release module */ - unlock_kernel(); + mutex_unlock(&nfsd_mutex); module_put_and_exit(0); + return 0; } static __be32 map_new_errors(u32 vers, __be32 nfserr) @@ -513,27 +651,30 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; - /* Check whether we have this call in the cache. */ - switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) { - case RC_INTR: - case RC_DROPIT: - return 0; - case RC_REPLY: - return 1; - case RC_DOIT:; - /* do it */ - } - + /* + * Give the xdr decoder a chance to change this if it wants + * (necessary in the NFSv4.0 compound case) + */ + rqstp->rq_cachetype = proc->pc_cachetype; /* Decode arguments */ xdr = proc->pc_decode; if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base, rqstp->rq_argp)) { dprintk("nfsd: failed to decode arguments!\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); *statp = rpc_garbage_args; return 1; } + /* Check whether we have this call in the cache. */ + switch (nfsd_cache_lookup(rqstp)) { + case RC_DROPIT: + return 0; + case RC_REPLY: + return 1; + case RC_DOIT:; + /* do it */ + } + /* need to grab the location to store the status, as * nfsv4 does some encoding while processing */ @@ -544,7 +685,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); - if (nfserr == nfserr_dropit) { + if (nfserr == nfserr_dropit || rqstp->rq_dropme) { dprintk("nfsd: Dropping request; may be revisited later\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); return 0; @@ -569,6 +710,35 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } + +int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + int ret; + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); + + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv == NULL) { + mutex_unlock(&nfsd_mutex); + return -ENODEV; + } + /* bump up the psudo refcount while traversing */ + svc_get(nn->nfsd_serv); + ret = svc_pool_stats_open(nn->nfsd_serv, file); + mutex_unlock(&nfsd_mutex); + return ret; +} + +int nfsd_pool_stats_release(struct inode *inode, struct file *file) +{ + int ret = seq_release(inode, file); + struct net *net = inode->i_sb->s_fs_info; + + mutex_lock(&nfsd_mutex); + /* this function really, really should have been called svc_put() */ + nfsd_destroy(net); + mutex_unlock(&nfsd_mutex); + return ret; +} diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index afd08e2c90a..1ac306b769d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -1,20 +1,11 @@ /* - * linux/fs/nfsd/nfsxdr.c - * * XDR support for nfsd * * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/types.h> -#include <linux/time.h> -#include <linux/nfs.h> -#include <linux/vfs.h> -#include <linux/sunrpc/xdr.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/xdr.h> -#include <linux/mm.h> +#include "vfs.h" +#include "xdr.h" #include "auth.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -110,12 +101,14 @@ decode_sattr(__be32 *p, struct iattr *iap) iap->ia_mode = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + iap->ia_uid = make_kuid(&init_user_ns, tmp); + if (uid_valid(iap->ia_uid)) + iap->ia_valid |= ATTR_UID; } if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + iap->ia_gid = make_kgid(&init_user_ns, tmp); + if (gid_valid(iap->ia_gid)) + iap->ia_valid |= ATTR_GID; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; @@ -161,8 +154,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl(nfs_ftypes[type >> 12]); *p++ = htonl((u32) stat->mode); *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); @@ -204,11 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, } /* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { - struct kstat stat; - vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat); - return encode_fattr(rqstp, p, fhp, &stat); + return encode_fattr(rqstp, p, fhp, stat); } /* @@ -223,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy) int nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; return xdr_argsize_check(rqstp, p); } @@ -256,8 +248,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readargs *args) { unsigned int len; - int v,pn; - if (!(p = decode_fh(p, &args->fh))) + int v; + p = decode_fh(p, &args->fh); + if (!p) return 0; args->offset = ntohl(*p++); @@ -272,8 +265,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, */ v=0; while (len > 0) { - pn = rqstp->rq_resused++; - rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; len -= rqstp->rq_vec[v].iov_len; v++; @@ -289,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, unsigned int len, hdr, dlen; int v; - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; p++; /* beginoffset */ @@ -363,9 +358,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p, int nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -399,14 +395,15 @@ int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readdirargs *args) { - if (!(p = decode_fh(p, &args->fh))) + p = decode_fh(p, &args->fh); + if (!p) return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); if (args->count > PAGE_SIZE) args->count = PAGE_SIZE; - args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]); + args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); } @@ -461,7 +458,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p, *p++ = htonl(resp->count); xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data aswell */ + /* now update rqstp->rq_res to reflect data as well */ rqstp->rq_res.page_len = resp->count; if (resp->count & 3) { /* need to pad the tail */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h new file mode 100644 index 00000000000..374c66283ac --- /dev/null +++ b/fs/nfsd/state.h @@ -0,0 +1,511 @@ +/* + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _NFSD4_STATE_H +#define _NFSD4_STATE_H + +#include <linux/idr.h> +#include <linux/sunrpc/svc_xprt.h> +#include "nfsfh.h" + +typedef struct { + u32 cl_boot; + u32 cl_id; +} clientid_t; + +typedef struct { + clientid_t so_clid; + u32 so_id; +} stateid_opaque_t; + +typedef struct { + u32 si_generation; + stateid_opaque_t si_opaque; +} stateid_t; + +#define STATEID_FMT "(%08x/%08x/%08x/%08x)" +#define STATEID_VAL(s) \ + (s)->si_opaque.so_clid.cl_boot, \ + (s)->si_opaque.so_clid.cl_id, \ + (s)->si_opaque.so_id, \ + (s)->si_generation + +struct nfsd4_callback { + void *cb_op; + struct nfs4_client *cb_clp; + struct list_head cb_per_client; + u32 cb_minorversion; + struct rpc_message cb_msg; + const struct rpc_call_ops *cb_ops; + struct work_struct cb_work; + bool cb_done; +}; + +struct nfs4_stid { +#define NFS4_OPEN_STID 1 +#define NFS4_LOCK_STID 2 +#define NFS4_DELEG_STID 4 +/* For an open stateid kept around *only* to process close replays: */ +#define NFS4_CLOSED_STID 8 +/* For a deleg stateid kept around only to process free_stateid's: */ +#define NFS4_REVOKED_DELEG_STID 16 + unsigned char sc_type; + stateid_t sc_stateid; + struct nfs4_client *sc_client; +}; + +struct nfs4_delegation { + struct nfs4_stid dl_stid; /* must be first field */ + struct list_head dl_perfile; + struct list_head dl_perclnt; + struct list_head dl_recall_lru; /* delegation recalled */ + atomic_t dl_count; /* ref count */ + struct nfs4_file *dl_file; + u32 dl_type; + time_t dl_time; +/* For recall: */ + struct knfsd_fh dl_fh; + int dl_retries; + struct nfsd4_callback dl_recall; +}; + +/* client delegation callback info */ +struct nfs4_cb_conn { + /* SETCLIENTID info */ + struct sockaddr_storage cb_addr; + struct sockaddr_storage cb_saddr; + size_t cb_addrlen; + u32 cb_prog; /* used only in 4.0 case; + per-session otherwise */ + u32 cb_ident; /* minorversion 0 only */ + struct svc_xprt *cb_xprt; /* minorversion 1 only */ +}; + +static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) +{ + return container_of(s, struct nfs4_delegation, dl_stid); +} + +/* Maximum number of slots per session. 160 is useful for long haul TCP */ +#define NFSD_MAX_SLOTS_PER_SESSION 160 +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 16 +/* Maximum session per slot cache size */ +#define NFSD_SLOT_CACHE_SIZE 2048 +/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ +#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 +#define NFSD_MAX_MEM_PER_SESSION \ + (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) + +struct nfsd4_slot { + u32 sl_seqid; + __be32 sl_status; + u32 sl_datalen; + u16 sl_opcnt; +#define NFSD4_SLOT_INUSE (1 << 0) +#define NFSD4_SLOT_CACHETHIS (1 << 1) +#define NFSD4_SLOT_INITIALIZED (1 << 2) + u8 sl_flags; + char sl_data[]; +}; + +struct nfsd4_channel_attrs { + u32 headerpadsz; + u32 maxreq_sz; + u32 maxresp_sz; + u32 maxresp_cached; + u32 maxops; + u32 maxreqs; + u32 nr_rdma_attrs; + u32 rdma_attrs; +}; + +struct nfsd4_cb_sec { + u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ + kuid_t uid; + kgid_t gid; +}; + +struct nfsd4_create_session { + clientid_t clientid; + struct nfs4_sessionid sessionid; + u32 seqid; + u32 flags; + struct nfsd4_channel_attrs fore_channel; + struct nfsd4_channel_attrs back_channel; + u32 callback_prog; + struct nfsd4_cb_sec cb_sec; +}; + +struct nfsd4_backchannel_ctl { + u32 bc_cb_program; + struct nfsd4_cb_sec bc_cb_sec; +}; + +struct nfsd4_bind_conn_to_session { + struct nfs4_sessionid sessionid; + u32 dir; +}; + +/* The single slot clientid cache structure */ +struct nfsd4_clid_slot { + u32 sl_seqid; + __be32 sl_status; + struct nfsd4_create_session sl_cr_ses; +}; + +struct nfsd4_conn { + struct list_head cn_persession; + struct svc_xprt *cn_xprt; + struct svc_xpt_user cn_xpt_user; + struct nfsd4_session *cn_session; +/* CDFC4_FORE, CDFC4_BACK: */ + unsigned char cn_flags; +}; + +struct nfsd4_session { + atomic_t se_ref; + struct list_head se_hash; /* hash by sessionid */ + struct list_head se_perclnt; +/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ +#define NFS4_SESSION_DEAD 0x010 + u32 se_flags; + struct nfs4_client *se_client; + struct nfs4_sessionid se_sessionid; + struct nfsd4_channel_attrs se_fchannel; + struct nfsd4_channel_attrs se_bchannel; + struct nfsd4_cb_sec se_cb_sec; + struct list_head se_conns; + u32 se_cb_prog; + u32 se_cb_seq_nr; + struct nfsd4_slot *se_slots[]; /* forward channel slots */ +}; + +extern void nfsd4_put_session(struct nfsd4_session *ses); + +/* formatted contents of nfs4_sessionid */ +struct nfsd4_sessionid { + clientid_t clientid; + u32 sequence; + u32 reserved; +}; + +#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ + +/* + * struct nfs4_client - one per client. Clientids live here. + * o Each nfs4_client is hashed by clientid. + * + * o Each nfs4_clients is also hashed by name + * (the opaque quantity initially sent by the client to identify itself). + * + * o cl_perclient list is used to ensure no dangling stateowner references + * when we expire the nfs4_client + */ +struct nfs4_client { + struct list_head cl_idhash; /* hash by cl_clientid.id */ + struct rb_node cl_namenode; /* link into by-name trees */ + struct list_head cl_openowners; + struct idr cl_stateids; /* stateid lookup */ + struct list_head cl_delegations; + struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ + struct list_head cl_lru; /* tail queue */ + struct xdr_netobj cl_name; /* id generated by client */ + nfs4_verifier cl_verifier; /* generated by client */ + time_t cl_time; /* time of last lease renewal */ + struct sockaddr_storage cl_addr; /* client ipaddress */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ + struct svc_cred cl_cred; /* setclientid principal */ + clientid_t cl_clientid; /* generated by server */ + nfs4_verifier cl_confirm; /* generated by server */ + u32 cl_minorversion; + + /* for v4.0 and v4.1 callbacks: */ + struct nfs4_cb_conn cl_cb_conn; +#define NFSD4_CLIENT_CB_UPDATE (0) +#define NFSD4_CLIENT_CB_KILL (1) +#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ +#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ +#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ +#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ + 1 << NFSD4_CLIENT_CB_KILL) + unsigned long cl_flags; + struct rpc_cred *cl_cb_cred; + struct rpc_clnt *cl_cb_client; + u32 cl_cb_ident; +#define NFSD4_CB_UP 0 +#define NFSD4_CB_UNKNOWN 1 +#define NFSD4_CB_DOWN 2 +#define NFSD4_CB_FAULT 3 + int cl_cb_state; + struct nfsd4_callback cl_cb_null; + struct nfsd4_session *cl_cb_session; + struct list_head cl_callbacks; /* list of in-progress callbacks */ + + /* for all client information that callback code might need: */ + spinlock_t cl_lock; + + /* for nfs41 */ + struct list_head cl_sessions; + struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ + u32 cl_exchange_flags; + /* number of rpc's in progress over an associated session: */ + atomic_t cl_refcount; + + /* for nfs41 callbacks */ + /* We currently support a single back channel with a single slot */ + unsigned long cl_cb_slot_busy; + struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ + /* wait here for slots */ + struct net *net; +}; + +/* struct nfs4_client_reset + * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl + * upon lease reset, or from upcall to state_daemon (to read in state + * from non-volitile storage) upon reboot. + */ +struct nfs4_client_reclaim { + struct list_head cr_strhash; /* hash by cr_name */ + struct nfs4_client *cr_clp; /* pointer to associated clp */ + char cr_recdir[HEXDIR_LEN]; /* recover dir */ +}; + +static inline void +update_stateid(stateid_t *stateid) +{ + stateid->si_generation++; + /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ + if (stateid->si_generation == 0) + stateid->si_generation = 1; +} + +/* A reasonable value for REPLAY_ISIZE was estimated as follows: + * The OPEN response, typically the largest, requires + * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) + + * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + + * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes + */ + +#define NFSD4_REPLAY_ISIZE 112 + +/* + * Replay buffer, where the result of the last seqid-mutating operation + * is cached. + */ +struct nfs4_replay { + __be32 rp_status; + unsigned int rp_buflen; + char *rp_buf; + struct knfsd_fh rp_openfh; + char rp_ibuf[NFSD4_REPLAY_ISIZE]; +}; + +struct nfs4_stateowner { + struct list_head so_strhash; /* hash by op_name */ + struct list_head so_stateids; + struct nfs4_client * so_client; + /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + * sequence id expected from the client: */ + u32 so_seqid; + struct xdr_netobj so_owner; /* open owner name */ + struct nfs4_replay so_replay; + bool so_is_open_owner; +}; + +struct nfs4_openowner { + struct nfs4_stateowner oo_owner; /* must be first field */ + struct list_head oo_perclient; + /* + * We keep around openowners a little while after last close, + * which saves clients from having to confirm, and allows us to + * handle close replays if they come soon enough. The close_lru + * is a list of such openowners, to be reaped by the laundromat + * thread eventually if they remain unused: + */ + struct list_head oo_close_lru; + struct nfs4_ol_stateid *oo_last_closed_stid; + time_t oo_time; /* time of placement on so_close_lru */ +#define NFS4_OO_CONFIRMED 1 +#define NFS4_OO_NEW 4 + unsigned char oo_flags; +}; + +struct nfs4_lockowner { + struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_owner_ino_hash; /* hash by owner,file */ + struct list_head lo_perstateid; + struct list_head lo_list; /* for temporary uses */ +}; + +static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) +{ + return container_of(so, struct nfs4_openowner, oo_owner); +} + +static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) +{ + return container_of(so, struct nfs4_lockowner, lo_owner); +} + +/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ +struct nfs4_file { + atomic_t fi_ref; + struct hlist_node fi_hash; /* hash by "struct inode *" */ + struct list_head fi_stateids; + struct list_head fi_delegations; + /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ + struct file * fi_fds[3]; + /* + * Each open or lock stateid contributes 0-4 to the counts + * below depending on which bits are set in st_access_bitmap: + * 1 to fi_access[O_RDONLY] if NFS4_SHARE_ACCES_READ is set + * + 1 to fi_access[O_WRONLY] if NFS4_SHARE_ACCESS_WRITE is set + * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. + */ + atomic_t fi_access[2]; + struct file *fi_deleg_file; + struct file_lock *fi_lease; + atomic_t fi_delegees; + struct inode *fi_inode; + bool fi_had_conflict; +}; + +/* XXX: for first cut may fall back on returning file that doesn't work + * at all? */ +static inline struct file *find_writeable_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_WRONLY]) + return f->fi_fds[O_WRONLY]; + return f->fi_fds[O_RDWR]; +} + +static inline struct file *find_readable_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_RDONLY]) + return f->fi_fds[O_RDONLY]; + return f->fi_fds[O_RDWR]; +} + +static inline struct file *find_any_file(struct nfs4_file *f) +{ + if (f->fi_fds[O_RDWR]) + return f->fi_fds[O_RDWR]; + else if (f->fi_fds[O_WRONLY]) + return f->fi_fds[O_WRONLY]; + else + return f->fi_fds[O_RDONLY]; +} + +/* "ol" stands for "Open or Lock". Better suggestions welcome. */ +struct nfs4_ol_stateid { + struct nfs4_stid st_stid; /* must be first field */ + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_lockowners; + struct nfs4_stateowner * st_stateowner; + struct nfs4_file * st_file; + unsigned long st_access_bmap; + unsigned long st_deny_bmap; + struct nfs4_ol_stateid * st_openstp; +}; + +static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) +{ + return container_of(s, struct nfs4_ol_stateid, st_stid); +} + +/* flags for preprocess_seqid_op() */ +#define RD_STATE 0x00000010 +#define WR_STATE 0x00000020 + +struct nfsd4_compound_state; +struct nfsd_net; + +extern __be32 nfs4_preprocess_stateid_op(struct net *net, + struct nfsd4_compound_state *cstate, + stateid_t *stateid, int flags, struct file **filp); +extern void nfs4_lock_state(void); +extern void nfs4_unlock_state(void); +void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); +extern void nfs4_release_reclaim(struct nfsd_net *); +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, + struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); +extern int set_callback_cred(void); +extern void nfsd4_init_callback(struct nfsd4_callback *); +extern void nfsd4_probe_callback(struct nfs4_client *clp); +extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); +extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); +extern void nfsd4_cb_recall(struct nfs4_delegation *dp); +extern int nfsd4_create_callback_queue(void); +extern void nfsd4_destroy_callback_queue(void); +extern void nfsd4_shutdown_callback(struct nfs4_client *); +extern void nfs4_put_delegation(struct nfs4_delegation *dp); +extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, + struct nfsd_net *nn); +extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); +extern void put_client_renew(struct nfs4_client *clp); + +/* nfs4recover operations */ +extern int nfsd4_client_tracking_init(struct net *net); +extern void nfsd4_client_tracking_exit(struct net *net); +extern void nfsd4_client_record_create(struct nfs4_client *clp); +extern void nfsd4_client_record_remove(struct nfs4_client *clp); +extern int nfsd4_client_record_check(struct nfs4_client *clp); +extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); + +/* nfs fault injection functions */ +#ifdef CONFIG_NFSD_FAULT_INJECTION +int nfsd_fault_inject_init(void); +void nfsd_fault_inject_cleanup(void); +u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); +struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); + +u64 nfsd_forget_client(struct nfs4_client *, u64); +u64 nfsd_forget_client_locks(struct nfs4_client*, u64); +u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); +u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); +u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_print_client(struct nfs4_client *, u64); +u64 nfsd_print_client_locks(struct nfs4_client *, u64); +u64 nfsd_print_client_openowners(struct nfs4_client *, u64); +u64 nfsd_print_client_delegations(struct nfs4_client *, u64); +#else /* CONFIG_NFSD_FAULT_INJECTION */ +static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_cleanup(void) {} +#endif /* CONFIG_NFSD_FAULT_INJECTION */ + +#endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 71944cddf68..cd90878a76a 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -1,6 +1,4 @@ /* - * linux/fs/nfsd/stats.c - * * procfs-based user access to knfsd statistics * * /proc/net/rpc/nfsd @@ -10,7 +8,7 @@ * Statistsics for the reply cache * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> * statistics for filehandle lookup - * io <bytes-read> <bytes-writtten> + * io <bytes-read> <bytes-written> * statistics for IO throughput * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> * time (seconds) when nfsd thread usage above thresholds @@ -23,17 +21,12 @@ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/kernel.h> -#include <linux/time.h> -#include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/stat.h> #include <linux/module.h> - -#include <linux/sunrpc/svc.h> #include <linux/sunrpc/stats.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/stats.h> +#include <net/net_namespace.h> + +#include "nfsd.h" struct nfsd_stats nfsdstats; struct svc_stat nfsd_svcstats = { @@ -101,11 +94,11 @@ static const struct file_operations nfsd_proc_fops = { void nfsd_stat_init(void) { - svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); + svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); } void nfsd_stat_shutdown(void) { - svc_proc_unregister("nfsd"); + svc_proc_unregister(&init_net, "nfsd"); } diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h new file mode 100644 index 00000000000..a5c944b771c --- /dev/null +++ b/fs/nfsd/stats.h @@ -0,0 +1,43 @@ +/* + * Statistics for NFS server. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ +#ifndef _NFSD_STATS_H +#define _NFSD_STATS_H + +#include <uapi/linux/nfsd/stats.h> + + +struct nfsd_stats { + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif + +}; + + +extern struct nfsd_stats nfsdstats; +extern struct svc_stat nfsd_svcstats; + +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +#endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 46f59d5365a..140c496f612 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1,7 +1,4 @@ -#define MSNFS /* HACK HACK */ /* - * linux/fs/nfsd/vfs.c - * * File operations used by nfsd. Some of these have been ripped from * other parts of the kernel because they weren't exported, others * are partial duplicates with added or changed functionality. @@ -16,47 +13,34 @@ * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> */ -#include <linux/string.h> -#include <linux/time.h> -#include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> -#include <linux/mount.h> -#include <linux/major.h> #include <linux/splice.h> -#include <linux/proc_fs.h> -#include <linux/stat.h> #include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <linux/in.h> -#include <linux/module.h> #include <linux/namei.h> -#include <linux/vfs.h> #include <linux/delay.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#ifdef CONFIG_NFSD_V3 -#include <linux/nfs3.h> -#include <linux/nfsd/xdr3.h> -#endif /* CONFIG_NFSD_V3 */ -#include <linux/nfsd/nfsfh.h> -#include <linux/quotaops.h> #include <linux/fsnotify.h> -#include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> -#ifdef CONFIG_NFSD_V4 -#include <linux/nfs4.h> -#include <linux/nfs4_acl.h> -#include <linux/nfsd_idmap.h> +#include <linux/jhash.h> +#include <linux/ima.h> +#include <linux/slab.h> +#include <asm/uaccess.h> +#include <linux/exportfs.h> +#include <linux/writeback.h> #include <linux/security.h> + +#ifdef CONFIG_NFSD_V3 +#include "xdr3.h" +#endif /* CONFIG_NFSD_V3 */ + +#ifdef CONFIG_NFSD_V4 +#include "acl.h" +#include "idmap.h" #endif /* CONFIG_NFSD_V4 */ -#include <linux/jhash.h> -#include <asm/uaccess.h> +#include "nfsd.h" +#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_FILEOP @@ -83,7 +67,6 @@ struct raparm_hbucket { spinlock_t pb_lock; } ____cacheline_aligned_in_smp; -static struct raparms * raparml; #define RAPARM_HASH_BITS 4 #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) @@ -101,35 +84,98 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - struct dentry *mounts = dget(dentry); + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dentry)}; int err = 0; - while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + err = follow_down(&path); + if (err < 0) + goto out; - exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); + exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { - if (PTR_ERR(exp2) != -ENOENT) - err = PTR_ERR(exp2); - dput(mounts); - mntput(mnt); + err = PTR_ERR(exp2); + /* + * We normally allow NFS clients to continue + * "underneath" a mountpoint that is not exported. + * The exception is V4ROOT, where no traversal is ever + * allowed without an explicit export of the new + * directory. + */ + if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT)) + err = 0; + path_put(&path); goto out; } - if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { + if (nfsd_v4client(rqstp) || + (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ - exp_put(exp); + /* + * This is subtle: path.dentry is *not* on path.mnt + * at this point. The only reason we are safe is that + * original mnt is pinned down by exp, so we should + * put path *before* putting exp + */ + *dpp = path.dentry; + path.dentry = dentry; *expp = exp2; - dput(dentry); - *dpp = mounts; - } else { - exp_put(exp2); - dput(mounts); + exp2 = exp; } - mntput(mnt); + path_put(&path); + exp_put(exp2); out: return err; } +static void follow_to_parent(struct path *path) +{ + struct dentry *dp; + + while (path->dentry == path->mnt->mnt_root && follow_up(path)) + ; + dp = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = dp; +} + +static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp) +{ + struct svc_export *exp2; + struct path path = {.mnt = mntget((*exp)->ex_path.mnt), + .dentry = dget(dparent)}; + + follow_to_parent(&path); + + exp2 = rqst_exp_parent(rqstp, &path); + if (PTR_ERR(exp2) == -ENOENT) { + *dentryp = dget(dparent); + } else if (IS_ERR(exp2)) { + path_put(&path); + return PTR_ERR(exp2); + } else { + *dentryp = dget(path.dentry); + exp_put(*exp); + *exp = exp2; + } + path_put(&path); + return 0; +} + +/* + * For nfsd purposes, we treat V4ROOT exports as though there was an + * export at *every* directory. + */ +int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) +{ + if (d_mountpoint(dentry)) + return 1; + if (nfsd4_is_junction(dentry)) + return 1; + if (!(exp->ex_flags & NFSEXP_V4ROOT)) + return 0; + return dentry->d_inode != NULL; +} + __be32 nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, unsigned int len, @@ -138,16 +184,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -158,37 +198,21 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = dget(dparent); else if (dparent != exp->ex_path.dentry) dentry = dget_parent(dparent); - else if (!EX_NOHIDE(exp)) + else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp)) dentry = dget(dparent); /* .. == . just like at / */ else { /* checking mountpoint crossing is very different when stepping up */ - struct svc_export *exp2 = NULL; - struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - dentry = dget(dparent); - while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) - ; - dp = dget_parent(dentry); - dput(dentry); - dentry = dp; - - exp2 = rqst_exp_parent(rqstp, mnt, dentry); - if (PTR_ERR(exp2) == -ENOENT) { - dput(dentry); - dentry = dget(dparent); - } else if (IS_ERR(exp2)) { - host_err = PTR_ERR(exp2); - dput(dentry); - mntput(mnt); + host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry); + if (host_err) goto out_nfserr; - } else { - exp_put(exp); - exp = exp2; - } - mntput(mnt); } } else { - fh_lock(fhp); + /* + * In the nfsd4_open() case, this may be held across + * subsequent open and delegation acquisition which may + * need to take the child's i_mutex: + */ + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = lookup_one_len(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -196,7 +220,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, /* * check if we have crossed a mount point ... */ - if (d_mountpoint(dentry)) { + if (nfsd_mountpoint(dentry, exp)) { if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { dput(dentry); goto out_nfserr; @@ -232,6 +256,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; @@ -251,44 +278,30 @@ out: return err; } - /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Commit metadata changes to stable storage. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static int +commit_metadata(struct svc_fh *fhp) { - struct dentry *dentry; - struct inode *inode; - int accmode = MAY_SATTR; - int ftype = 0; - int imode; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + struct inode *inode = fhp->fh_dentry->d_inode; + const struct export_operations *export_ops = inode->i_sb->s_export_op; - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; + if (!EX_ISSYNC(fhp->fh_export)) + return 0; - if (!iap->ia_valid) - goto out; + if (export_ops->commit_metadata) + return export_ops->commit_metadata(inode); + return sync_inode_metadata(inode, 1); +} +/* + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. + */ +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) +{ /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -298,8 +311,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -325,235 +337,194 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - /* - * If we are changing the size of the file, then - * we need to break all leases. - */ - host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); - if (host_err == -EWOULDBLOCK) - host_err = -ETIMEDOUT; - if (host_err) /* ENOMEM or EWOULDBLOCK */ - goto out_nfserr; - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - DQUOT_INIT(inode); - } - imode = inode->i_mode; + /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; - imode = iap->ia_mode |= (imode & ~S_IALLUGO); - /* if changing uid/gid revoke setuid/setgid in mode */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) { - iap->ia_valid |= ATTR_KILL_PRIV; + iap->ia_mode |= (inode->i_mode & ~S_IALLUGO); + } + + /* Revoke setuid/setgid on chown */ + if (!S_ISDIR(inode->i_mode) && + ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) { + iap->ia_valid |= ATTR_KILL_PRIV; + if (iap->ia_valid & ATTR_MODE) { + /* we're setting mode too, just clear the s*id bits */ iap->ia_mode &= ~S_ISUID; + if (iap->ia_mode & S_IXGRP) + iap->ia_mode &= ~S_ISGID; + } else { + /* set ATTR_KILL_* bits and let VFS handle it */ + iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_mode &= ~S_ISGID; - } else { - /* - * Revoke setuid/setgid bit on chown/chgrp - */ - if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) - iap->ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV; - if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) - iap->ia_valid |= ATTR_KILL_SGID; } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - fh_lock(fhp); - host_err = notify_change(dentry, iap); - err = nfserrno(host_err); - fh_unlock(fhp); + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; } - if (size_change) - put_write_access(inode); - if (!err) - if (EX_ISSYNC(fhp->fh_export)) - write_inode_now(inode, 1); -out: - return err; -out_nfserr: - err = nfserrno(host_err); - goto out; + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); } -#if defined(CONFIG_NFSD_V2_ACL) || \ - defined(CONFIG_NFSD_V3_ACL) || \ - defined(CONFIG_NFSD_V4) -static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) { - ssize_t buflen; + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + bool get_write_count; + int size_change = 0; - buflen = vfs_getxattr(dentry, key, NULL, 0); - if (buflen <= 0) - return buflen; + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; - *buf = kmalloc(buflen, GFP_KERNEL); - if (!*buf) - return -ENOMEM; + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; - return vfs_getxattr(dentry, key, *buf, buflen); -} -#endif - -#if defined(CONFIG_NFSD_V4) -static int -set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) -{ - int len; - size_t buflen; - char *buf = NULL; - int error = 0; - - buflen = posix_acl_xattr_size(pacl->a_count); - buf = kmalloc(buflen, GFP_KERNEL); - error = -ENOMEM; - if (buf == NULL) + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + } - len = posix_acl_to_xattr(pacl, buf, buflen); - if (len < 0) { - error = len; + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; + } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; } - error = vfs_setxattr(dentry, key, buf, len, 0); + fh_lock(fhp); + host_err = notify_change(dentry, iap, NULL); + fh_unlock(fhp); + err = nfserrno(host_err); + +out_put_write_access: + if (size_change) + put_write_access(inode); + if (!err) + commit_metadata(fhp); out: - kfree(buf); - return error; + return err; } -__be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +#if defined(CONFIG_NFSD_V4) +/* + * NFS junction information is stored in an extended attribute. + */ +#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs" + +/** + * nfsd4_is_junction - Test if an object could be an NFS junction + * + * @dentry: object to test + * + * Returns 1 if "dentry" appears to contain NFS junction information. + * Otherwise 0 is returned. + */ +int nfsd4_is_junction(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + if (inode == NULL) + return 0; + if (inode->i_mode & S_IXUGO) + return 0; + if (!(inode->i_mode & S_ISVTX)) + return 0; + if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) + return 0; + return 1; +} +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) { __be32 error; int host_error; struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; - /* Get inode */ - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); if (error) return error; dentry = fhp->fh_dentry; - inode = dentry->d_inode; - if (S_ISDIR(inode->i_mode)) - flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); - if (host_error == -EINVAL) { - return nfserr_attrnotsupp; - } else if (host_error < 0) - goto out_nfserr; - - host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); - if (host_error < 0) - goto out_release; - - if (S_ISDIR(inode->i_mode)) - host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); - -out_release: - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; - else - return nfserrno(host_error); -} - -static struct posix_acl * -_get_posix_acl(struct dentry *dentry, char *key) -{ - void *buf = NULL; - struct posix_acl *pacl = NULL; - int buflen; - - buflen = nfsd_getxattr(dentry, key, &buf); - if (!buflen) - buflen = -ENODATA; - if (buflen <= 0) - return ERR_PTR(buflen); - - pacl = posix_acl_from_xattr(buf, buflen); - kfree(buf); - return pacl; + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); } - -int -nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) { - struct inode *inode = dentry->d_inode; - int error = 0; - struct posix_acl *pacl = NULL, *dpacl = NULL; - unsigned int flags = 0; - - pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); - if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) - pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) { - error = PTR_ERR(pacl); - pacl = NULL; - goto out; - } - - if (S_ISDIR(inode->i_mode)) { - dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); - if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) - dpacl = NULL; - else if (IS_ERR(dpacl)) { - error = PTR_ERR(dpacl); - dpacl = NULL; - goto out; - } - flags = NFS4_ACL_DIR; - } - - *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); - if (IS_ERR(*acl)) { - error = PTR_ERR(*acl); - *acl = NULL; - } - out: - posix_acl_release(pacl); - posix_acl_release(dpacl); - return error; + return nfserr_notsupp; } +#endif -#endif /* defined(CONFIG_NFS_V4) */ +#endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 /* @@ -564,20 +535,20 @@ struct accessmap { int how; }; static struct accessmap nfs3_regaccess[] = { - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_EXECUTE, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, - { NFS3_ACCESS_EXTEND, MAY_WRITE }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, + { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, { 0, 0 } }; static struct accessmap nfs3_diraccess[] = { - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_LOOKUP, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, - { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, - { NFS3_ACCESS_DELETE, MAY_REMOVE }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC}, + { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, + { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, { 0, 0 } }; @@ -590,10 +561,10 @@ static struct accessmap nfs3_anyaccess[] = { * mainly at mode bits, and we make sure to ignore read-only * filesystem checks */ - { NFS3_ACCESS_READ, MAY_READ }, - { NFS3_ACCESS_EXECUTE, MAY_EXEC }, - { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, - { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, + { NFS3_ACCESS_READ, NFSD_MAY_READ }, + { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC }, + { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, + { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS }, { 0, 0 } }; @@ -607,7 +578,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor u32 query, result = 0, sresult = 0; __be32 error; - error = fh_verify(rqstp, fhp, 0, MAY_NOP); + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (error) goto out; @@ -657,40 +628,60 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor } #endif /* CONFIG_NFSD_V3 */ +static int nfsd_open_break_lease(struct inode *inode, int access) +{ + unsigned int mode; + if (access & NFSD_MAY_NOT_BREAK_LEASE) + return 0; + mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY; + return break_lease(inode, mode | O_NONBLOCK); +} /* * Open an existing file or directory. - * The access argument indicates the type of open (read/write/lock) + * The may_flags argument indicates the type of open (read/write/lock) + * and additional flags. * N.B. After this call fhp needs an fh_put */ __be32 -nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, - int access, struct file **filp) +nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, + int may_flags, struct file **filp) { - struct dentry *dentry; + struct path path; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; __be32 err; - int host_err; + int host_err = 0; + + validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. + * + * Arguably we should also allow the owner override for + * directories, but we never have and it doesn't seem to have + * caused anyone a problem. If we were to change this, note + * also that our filldir callbacks would need a variant of + * lookup_one_len that doesn't check permissions. */ - err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); + if (type == S_IFREG) + may_flags |= NFSD_MAY_OWNER_OVERRIDE; + err = fh_verify(rqstp, fhp, type, may_flags); if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; /* Disallow write access to files with the append-only bit set * or any access when mandatory locking enabled */ err = nfserr_perm; - if (IS_APPEND(inode) && (access & MAY_WRITE)) + if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; /* * We must ignore files (but only files) which might have mandatory @@ -703,31 +694,33 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!inode->i_fop) goto out; - /* - * Check to see if there are any leases on this file. - * This may block while leases are broken. - */ - host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); - if (host_err == -EWOULDBLOCK) - host_err = -ETIMEDOUT; + host_err = nfsd_open_break_lease(inode, may_flags); if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; - if (access & MAY_WRITE) { - if (access & MAY_READ) + if (may_flags & NFSD_MAY_WRITE) { + if (may_flags & NFSD_MAY_READ) flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; - - DQUOT_INIT(inode); } - *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags); - if (IS_ERR(*filp)) + *filp = dentry_open(&path, flags, current_cred()); + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); + *filp = NULL; + } else { + host_err = ima_file_check(*filp, may_flags); + + if (may_flags & NFSD_MAY_64BIT_COOKIE) + (*filp)->f_mode |= FMODE_64BITHASH; + else + (*filp)->f_mode |= FMODE_32BITHASH; + } + out_nfserr: err = nfserrno(host_err); out: + validate_process_creds(); return err; } @@ -741,47 +734,6 @@ nfsd_close(struct file *filp) } /* - * Sync a file - * As this calls fsync (not fdatasync) there is no need for a write_inode - * after it. - */ -static inline int nfsd_dosync(struct file *filp, struct dentry *dp, - const struct file_operations *fop) -{ - struct inode *inode = dp->d_inode; - int (*fsync) (struct file *, struct dentry *, int); - int err; - - err = filemap_fdatawrite(inode->i_mapping); - if (err == 0 && fop && (fsync = fop->fsync)) - err = fsync(filp, dp, 0); - if (err == 0) - err = filemap_fdatawait(inode->i_mapping); - - return err; -} - - -static int -nfsd_sync(struct file *filp) -{ - int err; - struct inode *inode = filp->f_path.dentry->d_inode; - dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name); - mutex_lock(&inode->i_mutex); - err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op); - mutex_unlock(&inode->i_mutex); - - return err; -} - -int -nfsd_sync_dir(struct dentry *dp) -{ - return nfsd_dosync(NULL, dp, dp->d_inode->i_fop); -} - -/* * Obtain the readahead parameters for the file * specified by (dev, ino). */ @@ -805,7 +757,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) if (ra->p_count == 0) frap = rap; } - depth = nfsdstats.ra_size*11/10; + depth = nfsdstats.ra_size; if (!frap) { spin_unlock(&rab->pb_lock); return NULL; @@ -838,30 +790,23 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_respages + rqstp->rq_resused; + struct page **pp = rqstp->rq_next_page; struct page *page = buf->page; size_t size; - int ret; - - ret = buf->ops->confirm(pipe, buf); - if (unlikely(ret)) - return ret; size = sd->len; if (rqstp->rq_res.page_len == 0) { get_page(page); - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_base = buf->offset; rqstp->rq_res.page_len = size; } else if (page != pp[-1]) { get_page(page); - if (*pp) - put_page(*pp); - *pp = page; - rqstp->rq_resused++; + if (*rqstp->rq_next_page) + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; rqstp->rq_res.page_len += size; } else rqstp->rq_res.page_len += size; @@ -875,89 +820,97 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -static inline int svc_msnfs(struct svc_fh *ffhp) +__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { -#ifdef MSNFS - return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS); -#else - return 0; -#endif + if (host_err >= 0) { + nfsdstats.io_read += host_err; + *count = host_err; + fsnotify_access(file); + return 0; + } else + return nfserrno(host_err); } -static __be32 -nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +int nfsd_splice_read(struct svc_rqst *rqstp, + struct file *file, loff_t offset, unsigned long *count) { - struct inode *inode; - struct raparms *ra; - mm_segment_t oldfs; - __be32 err; - int host_err; - - err = nfserr_perm; - inode = file->f_path.dentry->d_inode; + struct splice_desc sd = { + .len = 0, + .total_len = *count, + .pos = offset, + .u.data = rqstp, + }; + int host_err; + + rqstp->rq_next_page = rqstp->rq_respages + 1; + host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); + return nfsd_finish_read(file, count, host_err); +} - if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) - goto out; +int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + mm_segment_t oldfs; + int host_err; - /* Get readahead parameters */ - ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); + oldfs = get_fs(); + set_fs(KERNEL_DS); + host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + return nfsd_finish_read(file, count, host_err); +} - if (ra && ra->p_set) - file->f_ra = ra->p_ra; +static __be32 +nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + if (file->f_op->splice_read && rqstp->rq_splice_ok) + return nfsd_splice_read(rqstp, file, offset, count); + else + return nfsd_readv(file, offset, vec, vlen, count); +} - if (file->f_op->splice_read && rqstp->rq_splice_ok) { - struct splice_desc sd = { - .len = 0, - .total_len = *count, - .pos = offset, - .u.data = rqstp, - }; +/* + * Gathered writes: If another process is currently writing to the file, + * there's a high chance this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the file with each write + * request, we sleep for 10 msec. + * + * I don't know if this roughly approximates C. Juszak's idea of + * gathered writes, but it's a nice and simple solution (IMHO), and it + * seems to work:-) + * + * Note: we do this only in the NFSv2 case, since v3 and higher have a + * better tool (separate unstable writes and commits) for solving this + * problem. + */ +static int wait_for_concurrent_writes(struct file *file) +{ + struct inode *inode = file_inode(file); + static ino_t last_ino; + static dev_t last_dev; + int err = 0; - rqstp->rq_resused = 1; - host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor); - } else { - oldfs = get_fs(); - set_fs(KERNEL_DS); - host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); - set_fs(oldfs); + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); } - /* Write back readahead params */ - if (ra) { - struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; - spin_lock(&rab->pb_lock); - ra->p_ra = file->f_ra; - ra->p_set = 1; - ra->p_count--; - spin_unlock(&rab->pb_lock); + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + err = vfs_fsync(file, 0); } - - if (host_err >= 0) { - nfsdstats.io_read += host_err; - *count = host_err; - err = 0; - fsnotify_access(file->f_path.dentry); - } else - err = nfserrno(host_err); -out: + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; return err; } -static void kill_suid(struct dentry *dentry) -{ - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; - - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &ia); - mutex_unlock(&dentry->d_inode->i_mutex); -} - static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, - unsigned long cnt, int *stablep) + unsigned long *cnt, int *stablep) { struct svc_export *exp; struct dentry *dentry; @@ -966,122 +919,110 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, __be32 err = 0; int host_err; int stable = *stablep; + int use_wgather; + loff_t pos = offset; + unsigned int pflags = current->flags; -#ifdef MSNFS - err = nfserr_perm; - - if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) - goto out; -#endif + if (rqstp->rq_local) + /* + * We want less throttling in balance_dirty_pages() + * and shrink_inactive_list() so that nfs to + * localhost doesn't cause nfsd to lock up due to all + * the client's dirty pages or its congested queue. + */ + current->flags |= PF_LESS_THROTTLE; dentry = file->f_path.dentry; inode = dentry->d_inode; exp = fhp->fh_export; - /* - * Request sync writes if - * - the sync export option has been set, or - * - the client requested O_SYNC behavior (NFSv3 feature). - * - The file system doesn't support fsync(). - * When gathered writes have been configured for this volume, - * flushing the data to disk is handled separately below. - */ - - if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ - stable = 2; - *stablep = 2; /* FILE_SYNC */ - } + use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) - file->f_flags |= O_SYNC; /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); set_fs(oldfs); - if (host_err >= 0) { - nfsdstats.io_write += cnt; - fsnotify_modify(file->f_path.dentry); - } - - /* clear setuid/setgid flag after write */ - if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) - kill_suid(dentry); - - if (host_err >= 0 && stable) { - static ino_t last_ino; - static dev_t last_dev; - - /* - * Gathered writes: If another process is currently - * writing to the file, there's a high chance - * this is another nfsd (triggered by a bulk write - * from a client's biod). Rather than syncing the - * file with each write request, we sleep for 10 msec. - * - * I don't know if this roughly approximates - * C. Juszak's idea of gathered writes, but it's a - * nice and simple solution (IMHO), and it seems to - * work:-) - */ - if (EX_WGATHER(exp)) { - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } + if (host_err < 0) + goto out_nfserr; + *cnt = host_err; + nfsdstats.io_write += host_err; + fsnotify_modify(file); - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } -#if 0 - wake_up(&inode->i_wait); -#endif - } - last_ino = inode->i_ino; - last_dev = inode->i_sb->s_dev; + if (stable) { + if (use_wgather) + host_err = wait_for_concurrent_writes(file); + else + host_err = vfs_fsync_range(file, offset, offset+*cnt, 0); } +out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) err = 0; - else + else err = nfserrno(host_err); -out: + if (rqstp->rq_local) + tsk_restore_flags(current, pflags, PF_LESS_THROTTLE); return err; } +__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct file **file, struct raparms **ra) +{ + struct inode *inode; + __be32 err; + + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file); + if (err) + return err; + + inode = file_inode(*file); + + /* Get readahead parameters */ + *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); + + if (*ra && (*ra)->p_set) + (*file)->f_ra = (*ra)->p_ra; + return nfs_ok; +} + +void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra) +{ + /* Write back readahead params */ + if (ra) { + struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; + spin_lock(&rab->pb_lock); + ra->p_ra = file->f_ra; + ra->p_set = 1; + ra->p_count--; + spin_unlock(&rab->pb_lock); + } + nfsd_close(file); +} + /* * Read data from a file. count must contain the requested read count * on entry. On return, *count contains the number of bytes actually read. * N.B. After this call fhp needs an fh_put */ -__be32 -nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *count) +__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - __be32 err; + struct file *file; + struct raparms *ra; + __be32 err; + + err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra); + if (err) + return err; + + err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count); + + nfsd_put_tmp_read_open(file, ra); - if (file) { - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - MAY_READ|MAY_OWNER_OVERRIDE); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - } else { - err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); - if (err) - goto out; - err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); - nfsd_close(file); - } -out: return err; } @@ -1092,20 +1033,20 @@ out: */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, - loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, + loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, int *stablep) { __be32 err = 0; if (file) { err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - MAY_WRITE|MAY_OWNER_OVERRIDE); + NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stablep); } else { - err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); + err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); if (err) goto out; @@ -1121,8 +1062,9 @@ out: #ifdef CONFIG_NFSD_V3 /* * Commit all pending writes to stable storage. - * Strictly speaking, we could sync just the indicated file region here, - * but there's currently no way we can ask the VFS to do so. + * + * Note: we only guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. @@ -1132,27 +1074,37 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long count) { struct file *file; - __be32 err; + loff_t end = LLONG_MAX; + __be32 err = nfserr_inval; - if ((u64)count > ~(u64)offset) - return nfserr_inval; + if (offset < 0) + goto out; + if (count != 0) { + end = offset + (loff_t)count - 1; + if (end < offset) + goto out; + } - if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) - return err; + err = nfsd_open(rqstp, fhp, S_IFREG, + NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file); + if (err) + goto out; if (EX_ISSYNC(fhp->fh_export)) { - if (file->f_op && file->f_op->fsync) { - err = nfserrno(nfsd_sync(file)); - } else { + int err2 = vfs_fsync_range(file, offset, end, 0); + + if (err2 != -EINVAL) + err = nfserrno(err2); + else err = nfserr_notsupp; - } } nfsd_close(file); +out: return err; } #endif /* CONFIG_NFSD_V3 */ -__be32 +static __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, struct iattr *iap) { @@ -1165,13 +1117,28 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, * send along the gid on create when it tries to implement * setgid directories via NFS: */ - if (current->fsuid != 0) + if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); return 0; } +/* HPUX client sometimes creates a file in mode 000, and sets size to 0. + * setting size to 0 may fail for some specific file systems by the permission + * checking which requires WRITE permission but the mode is 000. + * we ignore the resizing(to 0) on the just new created file, since the size is + * 0 after file created. + * + * call this only after vfs_create() is called. + * */ +static void +nfsd_check_ignore_resizing(struct iattr *iap) +{ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; +} + /* * Create a file (regular, directory, device, fifo); UNIX sockets * not yet implemented. @@ -1198,7 +1165,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1206,13 +1173,17 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, dirp = dentry->d_inode; err = nfserr_notdir; - if(!dirp->i_op || !dirp->i_op->lookup) + if (!dirp->i_op->lookup) goto out; /* * Check whether the response file handle has been verified yet. * If it has, the parent directory should already be locked. */ if (!resfhp->fh_dentry) { + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); @@ -1228,9 +1199,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %s/%s not locked!\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + "nfsd_create: parent %pd2 not locked!\n", + dentry); err = nfserr_io; goto out; } @@ -1240,8 +1210,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %s/%s not negative!\n", - dentry->d_name.name, dchild->d_name.name); + dprintk("nfsd_create: dentry %pd/%pd not negative!\n", + dentry, dchild); goto out; } @@ -1249,13 +1219,23 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode = 0; iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; + err = nfserr_inval; + if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) { + printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", + type); + goto out; + } + /* * Get the dir op function pointer. */ err = 0; + host_err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); + if (!host_err) + nfsd_check_ignore_resizing(iap); break; case S_IFDIR: host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); @@ -1266,20 +1246,18 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFSOCK: host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); break; - default: - printk("nfsd: bad file type %o in nfsd_create\n", type); - host_err = -EINVAL; } if (host_err < 0) goto out_nfserr; - if (EX_ISSYNC(fhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(dentry)); - write_inode_now(dchild->d_inode, 1); - } - + err = nfsd_create_setattr(rqstp, resfhp, iap); - err2 = nfsd_create_setattr(rqstp, resfhp, iap); + /* + * nfsd_setattr already committed the child. Transactional filesystems + * had a chance to commit changes for both parent and child + * simultaneously making the following commit_metadata a noop. + */ + err2 = nfserrno(commit_metadata(fhp)); if (err2) err = err2; /* @@ -1298,19 +1276,25 @@ out_nfserr: } #ifdef CONFIG_NFSD_V3 + +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + /* - * NFSv3 version of nfsd_create + * NFSv3 and NFSv4 version of nfsd_create */ __be32 -nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, - int *truncp, int *created) + bool *truncp, bool *created) { struct dentry *dentry, *dchild = NULL; struct inode *dirp; __be32 err; - __be32 err2; int host_err; __u32 v_mtime=0, v_atime=0; @@ -1322,7 +1306,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; if (!(iap->ia_valid & ATTR_MODE)) iap->ia_mode = 0; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); if (err) goto out; @@ -1332,8 +1316,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, /* Get all the sanity checks out of the way before * we lock the parent. */ err = nfserr_notdir; - if(!dirp->i_op || !dirp->i_op->lookup) + if (!dirp->i_op->lookup) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); /* @@ -1344,11 +1333,18 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dchild)) goto out_nfserr; + /* If file doesn't exist, check for permissions to create one */ + if (!dchild->d_inode) { + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (err) + goto out; + } + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); if (err) goto out; - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have * the high bit set, so just clear the high bits. If this is * ever changed to use different attrs for storing the @@ -1365,7 +1361,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, switch (createmode) { case NFS3_CREATE_UNCHECKED: if (! S_ISREG(dchild->d_inode->i_mode)) - err = nfserr_exist; + goto out; else if (truncp) { /* in nfsv4, we need to treat this case a little * differently. we don't want to truncate the @@ -1384,27 +1380,38 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } + case NFS4_CREATE_EXCLUSIVE4_1: + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime + && dchild->d_inode->i_atime.tv_sec == v_atime + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; + goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; } + fh_drop_write(fhp); goto out; } - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); - if (host_err < 0) + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); + if (host_err < 0) { + fh_drop_write(fhp); goto out_nfserr; + } if (created) *created = 1; - if (EX_ISSYNC(fhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(dentry)); - /* setattr will sync the child (or not) */ - } + nfsd_check_ignore_resizing(iap); - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME | ATTR_MTIME_SET|ATTR_ATIME_SET; @@ -1416,9 +1423,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, } set_attr: - err2 = nfsd_create_setattr(rqstp, resfhp, iap); - if (err2) - err = err2; + err = nfsd_create_setattr(rqstp, resfhp, iap); + + /* + * nfsd_setattr already committed the child (and possibly also the parent). + */ + if (!err) + err = nfserrno(commit_metadata(fhp)); /* * Update the filehandle to get the new inode info. @@ -1430,6 +1441,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_unlock(fhp); if (dchild && !IS_ERR(dchild)) dput(dchild); + fh_drop_write(fhp); return err; out_nfserr: @@ -1446,30 +1458,31 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) { - struct dentry *dentry; struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; + struct path path; - err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); + err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP); if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; err = nfserr_inval; - if (!inode->i_op || !inode->i_op->readlink) + if (!inode->i_op->readlink) goto out; - touch_atime(fhp->fh_export->ex_path.mnt, dentry); + touch_atime(&path); /* N.B. Why does this call need a get_fs()?? * Remove the set_fs and watch the fireworks:-) --okir */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = inode->i_op->readlink(dentry, buf, *lenp); + host_err = inode->i_op->readlink(path.dentry, (char __user *)buf, *lenp); set_fs(oldfs); if (host_err < 0) @@ -1498,7 +1511,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry, *dnew; __be32 err, cerr; int host_err; - umode_t mode; err = nfserr_noent; if (!flen || !plen) @@ -1507,9 +1519,14 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock(fhp); dentry = fhp->fh_dentry; dnew = lookup_one_len(fname, dentry, flen); @@ -1517,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - mode = S_IALLUGO; - /* Only the MODE ATTRibute is even vaguely meaningful */ - if (iap && (iap->ia_valid & ATTR_MODE)) - mode = iap->ia_mode & S_IALLUGO; - if (unlikely(path[plen] != 0)) { char *path_alloced = kmalloc(plen+1, GFP_KERNEL); if (path_alloced == NULL) @@ -1529,19 +1541,18 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); kfree(path_alloced); } } else - host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); - - if (!host_err) { - if (EX_ISSYNC(fhp->fh_export)) - host_err = nfsd_sync_dir(dentry); - } + host_err = vfs_symlink(dentry->d_inode, dnew, path); err = nfserrno(host_err); + if (!err) + err = nfserrno(commit_metadata(fhp)); fh_unlock(fhp); + fh_drop_write(fhp); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; @@ -1562,17 +1573,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *name, int len, struct svc_fh *tfhp) { struct dentry *ddir, *dnew, *dold; - struct inode *dirp, *dest; + struct inode *dirp; __be32 err; int host_err; - err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; - err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); + err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP); if (err) goto out; - + err = nfserr_isdir; + if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode)) + goto out; err = nfserr_perm; if (!len) goto out; @@ -1580,6 +1593,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + host_err = fh_want_write(tfhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = ddir->d_inode; @@ -1590,25 +1609,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out_nfserr; dold = tfhp->fh_dentry; - dest = dold->d_inode; - host_err = vfs_link(dold, dirp, dnew); + err = nfserr_noent; + if (!dold->d_inode) + goto out_dput; + host_err = vfs_link(dold, dirp, dnew, NULL); if (!host_err) { - if (EX_ISSYNC(ffhp->fh_export)) { - err = nfserrno(nfsd_sync_dir(ddir)); - write_inode_now(dest, 1); - } - err = 0; + err = nfserrno(commit_metadata(ffhp)); + if (!err) + err = nfserrno(commit_metadata(tfhp)); } else { if (host_err == -EXDEV && rqstp->rq_vers == 2) err = nfserr_acces; else err = nfserrno(host_err); } - +out_dput: dput(dnew); out_unlock: fh_unlock(ffhp); + fh_drop_write(tfhp); out: return err; @@ -1630,10 +1650,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, __be32 err; int host_err; - err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; - err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); + err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1643,14 +1663,16 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, tdentry = tfhp->fh_dentry; tdir = tdentry->d_inode; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - if (ffhp->fh_export != tfhp->fh_export) - goto out; - err = nfserr_perm; if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; + host_err = fh_want_write(ffhp); + if (host_err) { + err = nfserrno(host_err); + goto out; + } + /* cannot use fh_lock as we need deadlock protective ordering * so do it by hand */ trap = lock_rename(tdentry, fdentry); @@ -1678,35 +1700,34 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ndentry == trap) goto out_dput_new; -#ifdef MSNFS - if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && - ((atomic_read(&odentry->d_count) > 1) - || (atomic_read(&ndentry->d_count) > 1))) { - host_err = -EPERM; - } else -#endif - host_err = vfs_rename(fdir, odentry, tdir, ndentry); - if (!host_err && EX_ISSYNC(tfhp->fh_export)) { - host_err = nfsd_sync_dir(tdentry); + host_err = -EXDEV; + if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) + goto out_dput_new; + if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) + goto out_dput_new; + + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); + if (!host_err) { + host_err = commit_metadata(tfhp); if (!host_err) - host_err = nfsd_sync_dir(fdentry); + host_err = commit_metadata(ffhp); } - out_dput_new: dput(ndentry); out_dput_old: dput(odentry); out_nfserr: err = nfserrno(host_err); - - /* we cannot reply on fh_unlock on the two filehandles, + /* + * We cannot rely on fh_unlock on the two filehandles, * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand + * were the same, so again we do it by hand. */ fill_post_wcc(ffhp); fill_post_wcc(tfhp); unlock_rename(tdentry, fdentry); ffhp->fh_locked = tfhp->fh_locked = 0; + fh_drop_write(ffhp); out: return err; @@ -1728,10 +1749,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; - err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = dentry->d_inode; @@ -1750,25 +1775,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!type) type = rdentry->d_inode->i_mode & S_IFMT; - if (type != S_IFDIR) { /* It's UNLINK */ -#ifdef MSNFS - if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && - (atomic_read(&rdentry->d_count) > 1)) { - host_err = -EPERM; - } else -#endif - host_err = vfs_unlink(dirp, rdentry); - } else { /* It's RMDIR */ + if (type != S_IFDIR) + host_err = vfs_unlink(dirp, rdentry, NULL); + else host_err = vfs_rmdir(dirp, rdentry); - } - + if (!host_err) + host_err = commit_metadata(fhp); dput(rdentry); - if (host_err) - goto out_nfserr; - if (EX_ISSYNC(fhp->fh_export)) - host_err = nfsd_sync_dir(dentry); - out_nfserr: err = nfserrno(host_err); out: @@ -1776,6 +1790,129 @@ out: } /* + * We do this buffering because we must not call back into the file + * system's ->lookup() method from the filldir callback. That may well + * deadlock a number of file systems. + * + * This is based heavily on the implementation of same in XFS. + */ +struct buffered_dirent { + u64 ino; + loff_t offset; + int namlen; + unsigned int d_type; + char name[]; +}; + +struct readdir_data { + struct dir_context ctx; + char *dirent; + size_t used; + int full; +}; + +static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_data *buf = __buf; + struct buffered_dirent *de = (void *)(buf->dirent + buf->used); + unsigned int reclen; + + reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); + if (buf->used + reclen > PAGE_SIZE) { + buf->full = 1; + return -EINVAL; + } + + de->namlen = namlen; + de->offset = offset; + de->ino = ino; + de->d_type = d_type; + memcpy(de->name, name, namlen); + buf->used += reclen; + + return 0; +} + +static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) +{ + struct buffered_dirent *de; + int host_err; + int size; + loff_t offset; + struct readdir_data buf = { + .ctx.actor = nfsd_buffered_filldir, + .dirent = (void *)__get_free_page(GFP_KERNEL) + }; + + if (!buf.dirent) + return nfserrno(-ENOMEM); + + offset = *offsetp; + + while (1) { + struct inode *dir_inode = file_inode(file); + unsigned int reclen; + + cdp->err = nfserr_eof; /* will be cleared on successful read */ + buf.used = 0; + buf.full = 0; + + host_err = iterate_dir(file, &buf.ctx); + if (buf.full) + host_err = 0; + + if (host_err < 0) + break; + + size = buf.used; + + if (!size) + break; + + /* + * Various filldir functions may end up calling back into + * lookup_one_len() and the file system's ->lookup() method. + * These expect i_mutex to be held, as it would within readdir. + */ + host_err = mutex_lock_killable(&dir_inode->i_mutex); + if (host_err) + break; + + de = (struct buffered_dirent *)buf.dirent; + while (size > 0) { + offset = de->offset; + + if (func(cdp, de->name, de->namlen, de->offset, + de->ino, de->d_type)) + break; + + if (cdp->err != nfs_ok) + break; + + reclen = ALIGN(sizeof(*de) + de->namlen, + sizeof(u64)); + size -= reclen; + de = (struct buffered_dirent *)((char *)de + reclen); + } + mutex_unlock(&dir_inode->i_mutex); + if (size > 0) /* We bailed out early */ + break; + + offset = vfs_llseek(file, 0, SEEK_CUR); + } + + free_page((unsigned long)(buf.dirent)); + + if (host_err) + return nfserrno(host_err); + + *offsetp = offset; + return cdp->err; +} + +/* * Read entries from a directory. * The NFSv3/4 verifier we ignore for now. */ @@ -1784,35 +1921,25 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct readdir_cd *cdp, filldir_t func) { __be32 err; - int host_err; struct file *file; loff_t offset = *offsetp; + int may_flags = NFSD_MAY_READ; + + /* NFSv2 only supports 32 bit cookies */ + if (rqstp->rq_vers > 2) + may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; - offset = vfs_llseek(file, offset, 0); + offset = vfs_llseek(file, offset, SEEK_SET); if (offset < 0) { err = nfserrno((int)offset); goto out_close; } - /* - * Read the directory entries. This silly loop is necessary because - * readdir() is not guaranteed to fill up the entire buffer, but - * may choose to do less. - */ - - do { - cdp->err = nfserr_eof; /* will be cleared on successful read */ - host_err = vfs_readdir(file, func, cdp); - } while (host_err >=0 && cdp->err == nfs_ok); - if (host_err) - err = nfserrno(host_err); - else - err = cdp->err; - *offsetp = vfs_llseek(file, 0, 1); + err = nfsd_buffered_readdir(file, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ @@ -1827,11 +1954,19 @@ out: * N.B. After this call fhp needs an fh_put */ __be32 -nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) +nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) { - __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP); - if (!err && vfs_statfs(fhp->fh_dentry,stat)) - err = nfserr_io; + __be32 err; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); + if (!err) { + struct path path = { + .mnt = fhp->fh_export->ex_path.mnt, + .dentry = fhp->fh_dentry, + }; + if (vfs_statfs(&path, stat)) + err = nfserr_io; + } return err; } @@ -1850,41 +1985,42 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", acc, - (acc & MAY_READ)? " read" : "", - (acc & MAY_WRITE)? " write" : "", - (acc & MAY_EXEC)? " exec" : "", - (acc & MAY_SATTR)? " sattr" : "", - (acc & MAY_TRUNC)? " trunc" : "", - (acc & MAY_LOCK)? " lock" : "", - (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", + (acc & NFSD_MAY_READ)? " read" : "", + (acc & NFSD_MAY_WRITE)? " write" : "", + (acc & NFSD_MAY_EXEC)? " exec" : "", + (acc & NFSD_MAY_SATTR)? " sattr" : "", + (acc & NFSD_MAY_TRUNC)? " trunc" : "", + (acc & NFSD_MAY_LOCK)? " lock" : "", + (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "", inode->i_mode, IS_IMMUTABLE(inode)? " immut" : "", IS_APPEND(inode)? " append" : "", - IS_RDONLY(inode)? " ro" : ""); + __mnt_is_readonly(exp->ex_path.mnt)? " ro" : ""); dprintk(" owner %d/%d user %d/%d\n", - inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); + inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid()); #endif /* Normally we reject any write/sattr etc access on a read-only file * system. But if it is IRIX doing check on write-access for a * device special file, we ignore rofs. */ - if (!(acc & MAY_LOCAL_ACCESS)) - if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) + if (!(acc & NFSD_MAY_LOCAL_ACCESS)) + if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { + if (exp_rdonly(rqstp, exp) || + __mnt_is_readonly(exp->ex_path.mnt)) return nfserr_rofs; - if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) return nfserr_perm; } - if ((acc & MAY_TRUNC) && IS_APPEND(inode)) + if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) return nfserr_perm; - if (acc & MAY_LOCK) { + if (acc & NFSD_MAY_LOCK) { /* If we cannot rely on authentication in NLM requests, * just allow locks, otherwise require read permission, or * ownership @@ -1892,7 +2028,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, if (exp->ex_flags & NFSEXP_NOAUTHNLM) return 0; else - acc = MAY_READ | MAY_OWNER_OVERRIDE; + acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; } /* * The file owner always gets access permission for accesses that @@ -1908,16 +2044,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, * We must trust the client to do permission checking - using "ACCESS" * with NFSv3. */ - if ((acc & MAY_OWNER_OVERRIDE) && - inode->i_uid == current->fsuid) + if ((acc & NFSD_MAY_OWNER_OVERRIDE) && + uid_eq(inode->i_uid, current_fsuid())) return 0; - err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); + /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ + err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && - acc == (MAY_READ | MAY_OWNER_OVERRIDE)) - err = permission(inode, MAY_EXEC, NULL); + (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) || + acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC))) + err = inode_permission(inode, MAY_EXEC); return err? nfserrno(err) : 0; } @@ -1925,11 +2063,20 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, void nfsd_racache_shutdown(void) { - if (!raparml) - return; + struct raparms *raparm, *last_raparm; + unsigned int i; + dprintk("nfsd: freeing readahead buffers.\n"); - kfree(raparml); - raparml = NULL; + + for (i = 0; i < RAPARM_HASH_SIZE; i++) { + raparm = raparm_hash[i].pb_head; + while(raparm) { + last_raparm = raparm; + raparm = raparm->p_next; + kfree(last_raparm); + } + raparm_hash[i].pb_head = NULL; + } } /* * Initialize readahead param cache @@ -1940,119 +2087,36 @@ nfsd_racache_init(int cache_size) int i; int j = 0; int nperbucket; + struct raparms **raparm = NULL; - if (raparml) + if (raparm_hash[0].pb_head) return 0; - if (cache_size < 2*RAPARM_HASH_SIZE) - cache_size = 2*RAPARM_HASH_SIZE; - raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); - - if (!raparml) { - printk(KERN_WARNING - "nfsd: Could not allocate memory read-ahead cache.\n"); - return -ENOMEM; - } + nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); + if (nperbucket < 2) + nperbucket = 2; + cache_size = nperbucket * RAPARM_HASH_SIZE; dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); - for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { - raparm_hash[i].pb_head = NULL; + + for (i = 0; i < RAPARM_HASH_SIZE; i++) { spin_lock_init(&raparm_hash[i].pb_lock); - } - nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - for (i = 0; i < cache_size - 1; i++) { - if (i % nperbucket == 0) - raparm_hash[j++].pb_head = raparml + i; - if (i % nperbucket < nperbucket-1) - raparml[i].p_next = raparml + i + 1; + + raparm = &raparm_hash[i].pb_head; + for (j = 0; j < nperbucket; j++) { + *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); + if (!*raparm) + goto out_nomem; + raparm = &(*raparm)->p_next; + } + *raparm = NULL; } nfsdstats.ra_size = cache_size; return 0; -} - -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -struct posix_acl * -nfsd_get_posix_acl(struct svc_fh *fhp, int type) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - ssize_t size; - struct posix_acl *acl; - - if (!IS_POSIXACL(inode)) - return ERR_PTR(-EOPNOTSUPP); - - switch (type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EOPNOTSUPP); - } - size = nfsd_getxattr(fhp->fh_dentry, name, &value); - if (size < 0) - return ERR_PTR(size); - - acl = posix_acl_from_xattr(value, size); - kfree(value); - return acl; -} - -int -nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - char *name; - void *value = NULL; - size_t size; - int error; - - if (!IS_POSIXACL(inode) || !inode->i_op || - !inode->i_op->setxattr || !inode->i_op->removexattr) - return -EOPNOTSUPP; - switch(type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return -EOPNOTSUPP; - } - - if (acl && acl->a_count) { - size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(size, GFP_KERNEL); - if (!value) - return -ENOMEM; - error = posix_acl_to_xattr(acl, value, size); - if (error < 0) - goto getout; - size = error; - } else - size = 0; - - if (size) - error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); - else { - if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) - error = 0; - else { - error = vfs_removexattr(fhp->fh_dentry, name); - if (error == -ENODATA) - error = 0; - } - } - -getout: - kfree(value); - return error; +out_nomem: + dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); + nfsd_racache_shutdown(); + return -ENOMEM; } -#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h new file mode 100644 index 00000000000..91b6ae3f658 --- /dev/null +++ b/fs/nfsd/vfs.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef LINUX_NFSD_VFS_H +#define LINUX_NFSD_VFS_H + +#include "nfsfh.h" +#include "nfsd.h" + +/* + * Flags for nfsd_permission + */ +#define NFSD_MAY_NOP 0 +#define NFSD_MAY_EXEC 0x001 /* == MAY_EXEC */ +#define NFSD_MAY_WRITE 0x002 /* == MAY_WRITE */ +#define NFSD_MAY_READ 0x004 /* == MAY_READ */ +#define NFSD_MAY_SATTR 0x008 +#define NFSD_MAY_TRUNC 0x010 +#define NFSD_MAY_LOCK 0x020 +#define NFSD_MAY_MASK 0x03f + +/* extra hints to permission and open routines: */ +#define NFSD_MAY_OWNER_OVERRIDE 0x040 +#define NFSD_MAY_LOCAL_ACCESS 0x080 /* for device special files */ +#define NFSD_MAY_BYPASS_GSS_ON_ROOT 0x100 +#define NFSD_MAY_NOT_BREAK_LEASE 0x200 +#define NFSD_MAY_BYPASS_GSS 0x400 +#define NFSD_MAY_READ_IF_EXEC 0x800 + +#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ + +#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) +#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) + +/* + * Callback function for readdir + */ +typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); + +/* nfsd/vfs.c */ +int nfsd_racache_init(int); +void nfsd_racache_shutdown(void); +int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, + struct svc_export **expp); +__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, + const char *, unsigned int, struct svc_fh *); +__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, + const char *, unsigned int, + struct svc_export **, struct dentry **); +__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, + struct iattr *, int, time_t); +int nfsd_mountpoint(struct dentry *, struct svc_export *); +#ifdef CONFIG_NFSD_V4 +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); +#endif /* CONFIG_NFSD_V4 */ +__be32 nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + int type, dev_t rdev, struct svc_fh *res); +#ifdef CONFIG_NFSD_V3 +__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); +__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + struct svc_fh *res, int createmode, + u32 *verifier, bool *truncp, bool *created); +__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, + loff_t, unsigned long); +#endif /* CONFIG_NFSD_V3 */ +__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, + int, struct file **); +void nfsd_close(struct file *); +struct raparms; +__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, + struct file **, struct raparms **); +void nfsd_put_tmp_read_open(struct file *, struct raparms *); +int nfsd_splice_read(struct svc_rqst *, + struct file *, loff_t, unsigned long *); +int nfsd_readv(struct file *, loff_t, struct kvec *, int, + unsigned long *); +__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, + loff_t, struct kvec *, int, unsigned long *); +__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, + loff_t, struct kvec *,int, unsigned long *, int *); +__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, + char *, int *); +__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, + char *name, int len, char *path, int plen, + struct svc_fh *res, struct iattr *); +__be32 nfsd_link(struct svc_rqst *, struct svc_fh *, + char *, int, struct svc_fh *); +__be32 nfsd_rename(struct svc_rqst *, + struct svc_fh *, char *, int, + struct svc_fh *, char *, int); +__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, + char *name, int len); +__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, + loff_t *, struct readdir_cd *, filldir_t); +__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, + struct kstatfs *, int access); + +__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, + struct dentry *, int); + +static inline int fh_want_write(struct svc_fh *fh) +{ + int ret = mnt_want_write(fh->fh_export->ex_path.mnt); + + if (!ret) + fh->fh_want_write = 1; + return ret; +} + +static inline void fh_drop_write(struct svc_fh *fh) +{ + if (fh->fh_want_write) { + fh->fh_want_write = 0; + mnt_drop_write(fh->fh_export->ex_path.mnt); + } +} + +static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +{ + struct path p = {.mnt = fh->fh_export->ex_path.mnt, + .dentry = fh->fh_dentry}; + return nfserrno(vfs_getattr(&p, stat)); +} + +#endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h new file mode 100644 index 00000000000..4f0481d6380 --- /dev/null +++ b/fs/nfsd/xdr.h @@ -0,0 +1,173 @@ +/* XDR types for nfsd. This is mainly a typing exercise. */ + +#ifndef LINUX_NFSD_H +#define LINUX_NFSD_H + +#include <linux/vfs.h> +#include "nfsd.h" +#include "nfsfh.h" + +struct nfsd_fhandle { + struct svc_fh fh; +}; + +struct nfsd_sattrargs { + struct svc_fh fh; + struct iattr attrs; +}; + +struct nfsd_diropargs { + struct svc_fh fh; + char * name; + unsigned int len; +}; + +struct nfsd_readargs { + struct svc_fh fh; + __u32 offset; + __u32 count; + int vlen; +}; + +struct nfsd_writeargs { + svc_fh fh; + __u32 offset; + int len; + int vlen; +}; + +struct nfsd_createargs { + struct svc_fh fh; + char * name; + unsigned int len; + struct iattr attrs; +}; + +struct nfsd_renameargs { + struct svc_fh ffh; + char * fname; + unsigned int flen; + struct svc_fh tfh; + char * tname; + unsigned int tlen; +}; + +struct nfsd_readlinkargs { + struct svc_fh fh; + char * buffer; +}; + +struct nfsd_linkargs { + struct svc_fh ffh; + struct svc_fh tfh; + char * tname; + unsigned int tlen; +}; + +struct nfsd_symlinkargs { + struct svc_fh ffh; + char * fname; + unsigned int flen; + char * tname; + unsigned int tlen; + struct iattr attrs; +}; + +struct nfsd_readdirargs { + struct svc_fh fh; + __u32 cookie; + __u32 count; + __be32 * buffer; +}; + +struct nfsd_attrstat { + struct svc_fh fh; + struct kstat stat; +}; + +struct nfsd_diropres { + struct svc_fh fh; + struct kstat stat; +}; + +struct nfsd_readlinkres { + int len; +}; + +struct nfsd_readres { + struct svc_fh fh; + unsigned long count; + struct kstat stat; +}; + +struct nfsd_readdirres { + int count; + + struct readdir_cd common; + __be32 * buffer; + int buflen; + __be32 * offset; +}; + +struct nfsd_statfsres { + struct kstatfs stats; +}; + +/* + * Storage requirements for XDR arguments and results. + */ +union nfsd_xdrstore { + struct nfsd_sattrargs sattr; + struct nfsd_diropargs dirop; + struct nfsd_readargs read; + struct nfsd_writeargs write; + struct nfsd_createargs create; + struct nfsd_renameargs rename; + struct nfsd_linkargs link; + struct nfsd_symlinkargs symlink; + struct nfsd_readdirargs readdir; +}; + +#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) + + +int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *); +int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); +int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *, + struct nfsd_sattrargs *); +int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *, + struct nfsd_diropargs *); +int nfssvc_decode_readargs(struct svc_rqst *, __be32 *, + struct nfsd_readargs *); +int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *, + struct nfsd_writeargs *); +int nfssvc_decode_createargs(struct svc_rqst *, __be32 *, + struct nfsd_createargs *); +int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *, + struct nfsd_renameargs *); +int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *, + struct nfsd_readlinkargs *); +int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *, + struct nfsd_linkargs *); +int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *, + struct nfsd_symlinkargs *); +int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *, + struct nfsd_readdirargs *); +int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *); +int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *); +int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *); +int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *); +int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *); +int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *); +int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *); + +int nfssvc_encode_entry(void *, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int); + +int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); + +/* Helper functions for NFSv2 ACL code */ +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); +__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); + +#endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h new file mode 100644 index 00000000000..335e04aaf7d --- /dev/null +++ b/fs/nfsd/xdr3.h @@ -0,0 +1,349 @@ +/* + * XDR types for NFSv3 in nfsd. + * + * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de> + */ + +#ifndef _LINUX_NFSD_XDR3_H +#define _LINUX_NFSD_XDR3_H + +#include "xdr.h" + +struct nfsd3_sattrargs { + struct svc_fh fh; + struct iattr attrs; + int check_guard; + time_t guardtime; +}; + +struct nfsd3_diropargs { + struct svc_fh fh; + char * name; + unsigned int len; +}; + +struct nfsd3_accessargs { + struct svc_fh fh; + unsigned int access; +}; + +struct nfsd3_readargs { + struct svc_fh fh; + __u64 offset; + __u32 count; + int vlen; +}; + +struct nfsd3_writeargs { + svc_fh fh; + __u64 offset; + __u32 count; + int stable; + __u32 len; + int vlen; +}; + +struct nfsd3_createargs { + struct svc_fh fh; + char * name; + unsigned int len; + int createmode; + struct iattr attrs; + __be32 * verf; +}; + +struct nfsd3_mknodargs { + struct svc_fh fh; + char * name; + unsigned int len; + __u32 ftype; + __u32 major, minor; + struct iattr attrs; +}; + +struct nfsd3_renameargs { + struct svc_fh ffh; + char * fname; + unsigned int flen; + struct svc_fh tfh; + char * tname; + unsigned int tlen; +}; + +struct nfsd3_readlinkargs { + struct svc_fh fh; + char * buffer; +}; + +struct nfsd3_linkargs { + struct svc_fh ffh; + struct svc_fh tfh; + char * tname; + unsigned int tlen; +}; + +struct nfsd3_symlinkargs { + struct svc_fh ffh; + char * fname; + unsigned int flen; + char * tname; + unsigned int tlen; + struct iattr attrs; +}; + +struct nfsd3_readdirargs { + struct svc_fh fh; + __u64 cookie; + __u32 dircount; + __u32 count; + __be32 * verf; + __be32 * buffer; +}; + +struct nfsd3_commitargs { + struct svc_fh fh; + __u64 offset; + __u32 count; +}; + +struct nfsd3_getaclargs { + struct svc_fh fh; + int mask; +}; + +struct posix_acl; +struct nfsd3_setaclargs { + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; +}; + +struct nfsd3_attrstat { + __be32 status; + struct svc_fh fh; + struct kstat stat; +}; + +/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */ +struct nfsd3_diropres { + __be32 status; + struct svc_fh dirfh; + struct svc_fh fh; +}; + +struct nfsd3_accessres { + __be32 status; + struct svc_fh fh; + __u32 access; + struct kstat stat; +}; + +struct nfsd3_readlinkres { + __be32 status; + struct svc_fh fh; + __u32 len; +}; + +struct nfsd3_readres { + __be32 status; + struct svc_fh fh; + unsigned long count; + int eof; +}; + +struct nfsd3_writeres { + __be32 status; + struct svc_fh fh; + unsigned long count; + int committed; +}; + +struct nfsd3_renameres { + __be32 status; + struct svc_fh ffh; + struct svc_fh tfh; +}; + +struct nfsd3_linkres { + __be32 status; + struct svc_fh tfh; + struct svc_fh fh; +}; + +struct nfsd3_readdirres { + __be32 status; + struct svc_fh fh; + /* Just to save kmalloc on every readdirplus entry (svc_fh is a + * little large for the stack): */ + struct svc_fh scratch; + int count; + __be32 verf[2]; + + struct readdir_cd common; + __be32 * buffer; + int buflen; + __be32 * offset; + __be32 * offset1; + struct svc_rqst * rqstp; + +}; + +struct nfsd3_fsstatres { + __be32 status; + struct kstatfs stats; + __u32 invarsec; +}; + +struct nfsd3_fsinfores { + __be32 status; + __u32 f_rtmax; + __u32 f_rtpref; + __u32 f_rtmult; + __u32 f_wtmax; + __u32 f_wtpref; + __u32 f_wtmult; + __u32 f_dtpref; + __u64 f_maxfilesize; + __u32 f_properties; +}; + +struct nfsd3_pathconfres { + __be32 status; + __u32 p_link_max; + __u32 p_name_max; + __u32 p_no_trunc; + __u32 p_chown_restricted; + __u32 p_case_insensitive; + __u32 p_case_preserving; +}; + +struct nfsd3_commitres { + __be32 status; + struct svc_fh fh; +}; + +struct nfsd3_getaclres { + __be32 status; + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; + struct kstat stat; +}; + +/* dummy type for release */ +struct nfsd3_fhandle_pair { + __u32 dummy; + struct svc_fh fh1; + struct svc_fh fh2; +}; + +/* + * Storage requirements for XDR arguments and results. + */ +union nfsd3_xdrstore { + struct nfsd3_sattrargs sattrargs; + struct nfsd3_diropargs diropargs; + struct nfsd3_readargs readargs; + struct nfsd3_writeargs writeargs; + struct nfsd3_createargs createargs; + struct nfsd3_renameargs renameargs; + struct nfsd3_linkargs linkargs; + struct nfsd3_symlinkargs symlinkargs; + struct nfsd3_readdirargs readdirargs; + struct nfsd3_diropres diropres; + struct nfsd3_accessres accessres; + struct nfsd3_readlinkres readlinkres; + struct nfsd3_readres readres; + struct nfsd3_writeres writeres; + struct nfsd3_renameres renameres; + struct nfsd3_linkres linkres; + struct nfsd3_readdirres readdirres; + struct nfsd3_fsstatres fsstatres; + struct nfsd3_fsinfores fsinfores; + struct nfsd3_pathconfres pathconfres; + struct nfsd3_commitres commitres; + struct nfsd3_getaclres getaclres; +}; + +#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) + +int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); +int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *, + struct nfsd3_sattrargs *); +int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *, + struct nfsd3_diropargs *); +int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *, + struct nfsd3_accessargs *); +int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *, + struct nfsd3_readargs *); +int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *, + struct nfsd3_writeargs *); +int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *, + struct nfsd3_createargs *); +int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *, + struct nfsd3_createargs *); +int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *, + struct nfsd3_mknodargs *); +int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *, + struct nfsd3_renameargs *); +int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *, + struct nfsd3_readlinkargs *); +int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *, + struct nfsd3_linkargs *); +int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *, + struct nfsd3_symlinkargs *); +int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *, + struct nfsd3_readdirargs *); +int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *, + struct nfsd3_readdirargs *); +int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *, + struct nfsd3_commitargs *); +int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *); +int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *, + struct nfsd3_attrstat *); +int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *, + struct nfsd3_attrstat *); +int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *, + struct nfsd3_diropres *); +int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *, + struct nfsd3_accessres *); +int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *, + struct nfsd3_readlinkres *); +int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *); +int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *); +int nfs3svc_encode_createres(struct svc_rqst *, __be32 *, + struct nfsd3_diropres *); +int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *, + struct nfsd3_renameres *); +int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *, + struct nfsd3_linkres *); +int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *, + struct nfsd3_readdirres *); +int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *, + struct nfsd3_fsstatres *); +int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *, + struct nfsd3_fsinfores *); +int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *, + struct nfsd3_pathconfres *); +int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *, + struct nfsd3_commitres *); + +int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *, + struct nfsd3_attrstat *); +int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *, + struct nfsd3_fhandle_pair *); +int nfs3svc_encode_entry(void *, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int); +int nfs3svc_encode_entry_plus(void *, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int); +/* Helper functions for NFSv3 ACL code */ +__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, + struct svc_fh *fhp); +__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp); + + +#endif /* _LINUX_NFSD_XDR3_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h new file mode 100644 index 00000000000..18cbb6d9c8a --- /dev/null +++ b/fs/nfsd/xdr4.h @@ -0,0 +1,634 @@ +/* + * Server-side types for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _LINUX_NFSD_XDR4_H +#define _LINUX_NFSD_XDR4_H + +#include "state.h" +#include "nfsd.h" + +#define NFSD4_MAX_SEC_LABEL_LEN 2048 +#define NFSD4_MAX_TAGLEN 128 +#define XDR_LEN(n) (((n) + 3) & ~3) + +#define CURRENT_STATE_ID_FLAG (1<<0) +#define SAVED_STATE_ID_FLAG (1<<1) + +#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) +#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) +#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) + +struct nfsd4_compound_state { + struct svc_fh current_fh; + struct svc_fh save_fh; + struct nfs4_stateowner *replay_owner; + /* For sessions DRC */ + struct nfsd4_session *session; + struct nfsd4_slot *slot; + int data_offset; + size_t iovlen; + u32 minorversion; + __be32 status; + stateid_t current_stateid; + stateid_t save_stateid; + /* to indicate current and saved state id presents */ + u32 sid_flags; +}; + +static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) +{ + return cs->slot != NULL; +} + +struct nfsd4_change_info { + u32 atomic; + bool change_supported; + u32 before_ctime_sec; + u32 before_ctime_nsec; + u64 before_change; + u32 after_ctime_sec; + u32 after_ctime_nsec; + u64 after_change; +}; + +struct nfsd4_access { + u32 ac_req_access; /* request */ + u32 ac_supported; /* response */ + u32 ac_resp_access; /* response */ +}; + +struct nfsd4_close { + u32 cl_seqid; /* request */ + stateid_t cl_stateid; /* request+response */ +}; + +struct nfsd4_commit { + u64 co_offset; /* request */ + u32 co_count; /* request */ + nfs4_verifier co_verf; /* response */ +}; + +struct nfsd4_create { + u32 cr_namelen; /* request */ + char * cr_name; /* request */ + u32 cr_type; /* request */ + union { /* request */ + struct { + u32 namelen; + char *name; + } link; /* NF4LNK */ + struct { + u32 specdata1; + u32 specdata2; + } dev; /* NF4BLK, NF4CHR */ + } u; + u32 cr_bmval[3]; /* request */ + struct iattr cr_iattr; /* request */ + struct nfsd4_change_info cr_cinfo; /* response */ + struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; +}; +#define cr_linklen u.link.namelen +#define cr_linkname u.link.name +#define cr_specdata1 u.dev.specdata1 +#define cr_specdata2 u.dev.specdata2 + +struct nfsd4_delegreturn { + stateid_t dr_stateid; +}; + +struct nfsd4_getattr { + u32 ga_bmval[3]; /* request */ + struct svc_fh *ga_fhp; /* response */ +}; + +struct nfsd4_link { + u32 li_namelen; /* request */ + char * li_name; /* request */ + struct nfsd4_change_info li_cinfo; /* response */ +}; + +struct nfsd4_lock_denied { + clientid_t ld_clientid; + struct xdr_netobj ld_owner; + u64 ld_start; + u64 ld_length; + u32 ld_type; +}; + +struct nfsd4_lock { + /* request */ + u32 lk_type; + u32 lk_reclaim; /* boolean */ + u64 lk_offset; + u64 lk_length; + u32 lk_is_new; + union { + struct { + u32 open_seqid; + stateid_t open_stateid; + u32 lock_seqid; + clientid_t clientid; + struct xdr_netobj owner; + } new; + struct { + stateid_t lock_stateid; + u32 lock_seqid; + } old; + } v; + + /* response */ + union { + struct { + stateid_t stateid; + } ok; + struct nfsd4_lock_denied denied; + } u; +}; +#define lk_new_open_seqid v.new.open_seqid +#define lk_new_open_stateid v.new.open_stateid +#define lk_new_lock_seqid v.new.lock_seqid +#define lk_new_clientid v.new.clientid +#define lk_new_owner v.new.owner +#define lk_old_lock_stateid v.old.lock_stateid +#define lk_old_lock_seqid v.old.lock_seqid + +#define lk_resp_stateid u.ok.stateid +#define lk_denied u.denied + + +struct nfsd4_lockt { + u32 lt_type; + clientid_t lt_clientid; + struct xdr_netobj lt_owner; + u64 lt_offset; + u64 lt_length; + struct nfsd4_lock_denied lt_denied; +}; + + +struct nfsd4_locku { + u32 lu_type; + u32 lu_seqid; + stateid_t lu_stateid; + u64 lu_offset; + u64 lu_length; +}; + + +struct nfsd4_lookup { + u32 lo_len; /* request */ + char * lo_name; /* request */ +}; + +struct nfsd4_putfh { + u32 pf_fhlen; /* request */ + char *pf_fhval; /* request */ +}; + +struct nfsd4_open { + u32 op_claim_type; /* request */ + struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ + u32 op_delegate_type; /* request - CLAIM_PREV only */ + stateid_t op_delegate_stateid; /* request - response */ + u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ + u32 op_create; /* request */ + u32 op_createmode; /* request */ + u32 op_bmval[3]; /* request */ + struct iattr op_iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ + nfs4_verifier op_verf __attribute__((aligned(32))); + /* EXCLUSIVE4 */ + clientid_t op_clientid; /* request */ + struct xdr_netobj op_owner; /* request */ + u32 op_seqid; /* request */ + u32 op_share_access; /* request */ + u32 op_share_deny; /* request */ + u32 op_deleg_want; /* request */ + stateid_t op_stateid; /* response */ + __be32 op_xdr_error; /* see nfsd4_open_omfg() */ + u32 op_recall; /* recall */ + struct nfsd4_change_info op_cinfo; /* response */ + u32 op_rflags; /* response */ + bool op_truncate; /* used during processing */ + bool op_created; /* used during processing */ + struct nfs4_openowner *op_openowner; /* used during processing */ + struct nfs4_file *op_file; /* used during processing */ + struct nfs4_ol_stateid *op_stp; /* used during processing */ + struct nfs4_acl *op_acl; + struct xdr_netobj op_label; +}; + +struct nfsd4_open_confirm { + stateid_t oc_req_stateid /* request */; + u32 oc_seqid /* request */; + stateid_t oc_resp_stateid /* response */; +}; + +struct nfsd4_open_downgrade { + stateid_t od_stateid; + u32 od_seqid; + u32 od_share_access; /* request */ + u32 od_deleg_want; /* request */ + u32 od_share_deny; /* request */ +}; + + +struct nfsd4_read { + stateid_t rd_stateid; /* request */ + u64 rd_offset; /* request */ + u32 rd_length; /* request */ + int rd_vlen; + struct file *rd_filp; + + struct svc_rqst *rd_rqstp; /* response */ + struct svc_fh * rd_fhp; /* response */ +}; + +struct nfsd4_readdir { + u64 rd_cookie; /* request */ + nfs4_verifier rd_verf; /* request */ + u32 rd_dircount; /* request */ + u32 rd_maxcount; /* request */ + u32 rd_bmval[3]; /* request */ + struct svc_rqst *rd_rqstp; /* response */ + struct svc_fh * rd_fhp; /* response */ + + struct readdir_cd common; + struct xdr_stream *xdr; + int cookie_offset; +}; + +struct nfsd4_release_lockowner { + clientid_t rl_clientid; + struct xdr_netobj rl_owner; +}; +struct nfsd4_readlink { + struct svc_rqst *rl_rqstp; /* request */ + struct svc_fh * rl_fhp; /* request */ +}; + +struct nfsd4_remove { + u32 rm_namelen; /* request */ + char * rm_name; /* request */ + struct nfsd4_change_info rm_cinfo; /* response */ +}; + +struct nfsd4_rename { + u32 rn_snamelen; /* request */ + char * rn_sname; /* request */ + u32 rn_tnamelen; /* request */ + char * rn_tname; /* request */ + struct nfsd4_change_info rn_sinfo; /* response */ + struct nfsd4_change_info rn_tinfo; /* response */ +}; + +struct nfsd4_secinfo { + u32 si_namelen; /* request */ + char *si_name; /* request */ + struct svc_export *si_exp; /* response */ +}; + +struct nfsd4_secinfo_no_name { + u32 sin_style; /* request */ + struct svc_export *sin_exp; /* response */ +}; + +struct nfsd4_setattr { + stateid_t sa_stateid; /* request */ + u32 sa_bmval[3]; /* request */ + struct iattr sa_iattr; /* request */ + struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; +}; + +struct nfsd4_setclientid { + nfs4_verifier se_verf; /* request */ + struct xdr_netobj se_name; + u32 se_callback_prog; /* request */ + u32 se_callback_netid_len; /* request */ + char * se_callback_netid_val; /* request */ + u32 se_callback_addr_len; /* request */ + char * se_callback_addr_val; /* request */ + u32 se_callback_ident; /* request */ + clientid_t se_clientid; /* response */ + nfs4_verifier se_confirm; /* response */ +}; + +struct nfsd4_setclientid_confirm { + clientid_t sc_clientid; + nfs4_verifier sc_confirm; +}; + +struct nfsd4_saved_compoundargs { + __be32 *p; + __be32 *end; + int pagelen; + struct page **pagelist; +}; + +struct nfsd4_test_stateid_id { + __be32 ts_id_status; + stateid_t ts_id_stateid; + struct list_head ts_id_list; +}; + +struct nfsd4_test_stateid { + u32 ts_num_ids; + struct list_head ts_stateid_list; +}; + +struct nfsd4_free_stateid { + stateid_t fr_stateid; /* request */ +}; + +/* also used for NVERIFY */ +struct nfsd4_verify { + u32 ve_bmval[3]; /* request */ + u32 ve_attrlen; /* request */ + char * ve_attrval; /* request */ +}; + +struct nfsd4_write { + stateid_t wr_stateid; /* request */ + u64 wr_offset; /* request */ + u32 wr_stable_how; /* request */ + u32 wr_buflen; /* request */ + struct kvec wr_head; + struct page ** wr_pagelist; /* request */ + + u32 wr_bytes_written; /* response */ + u32 wr_how_written; /* response */ + nfs4_verifier wr_verifier; /* response */ +}; + +struct nfsd4_exchange_id { + nfs4_verifier verifier; + struct xdr_netobj clname; + u32 flags; + clientid_t clientid; + u32 seqid; + int spa_how; +}; + +struct nfsd4_sequence { + struct nfs4_sessionid sessionid; /* request/response */ + u32 seqid; /* request/response */ + u32 slotid; /* request/response */ + u32 maxslots; /* request/response */ + u32 cachethis; /* request */ +#if 0 + u32 target_maxslots; /* response */ +#endif /* not yet */ + u32 status_flags; /* response */ +}; + +struct nfsd4_destroy_session { + struct nfs4_sessionid sessionid; +}; + +struct nfsd4_destroy_clientid { + clientid_t clientid; +}; + +struct nfsd4_reclaim_complete { + u32 rca_one_fs; +}; + +struct nfsd4_op { + int opnum; + __be32 status; + union { + struct nfsd4_access access; + struct nfsd4_close close; + struct nfsd4_commit commit; + struct nfsd4_create create; + struct nfsd4_delegreturn delegreturn; + struct nfsd4_getattr getattr; + struct svc_fh * getfh; + struct nfsd4_link link; + struct nfsd4_lock lock; + struct nfsd4_lockt lockt; + struct nfsd4_locku locku; + struct nfsd4_lookup lookup; + struct nfsd4_verify nverify; + struct nfsd4_open open; + struct nfsd4_open_confirm open_confirm; + struct nfsd4_open_downgrade open_downgrade; + struct nfsd4_putfh putfh; + struct nfsd4_read read; + struct nfsd4_readdir readdir; + struct nfsd4_readlink readlink; + struct nfsd4_remove remove; + struct nfsd4_rename rename; + clientid_t renew; + struct nfsd4_secinfo secinfo; + struct nfsd4_setattr setattr; + struct nfsd4_setclientid setclientid; + struct nfsd4_setclientid_confirm setclientid_confirm; + struct nfsd4_verify verify; + struct nfsd4_write write; + struct nfsd4_release_lockowner release_lockowner; + + /* NFSv4.1 */ + struct nfsd4_exchange_id exchange_id; + struct nfsd4_backchannel_ctl backchannel_ctl; + struct nfsd4_bind_conn_to_session bind_conn_to_session; + struct nfsd4_create_session create_session; + struct nfsd4_destroy_session destroy_session; + struct nfsd4_sequence sequence; + struct nfsd4_reclaim_complete reclaim_complete; + struct nfsd4_test_stateid test_stateid; + struct nfsd4_free_stateid free_stateid; + } u; + struct nfs4_replay * replay; +}; + +bool nfsd4_cache_this_op(struct nfsd4_op *); + +struct nfsd4_compoundargs { + /* scratch variables for XDR decode */ + __be32 * p; + __be32 * end; + struct page ** pagelist; + int pagelen; + __be32 tmp[8]; + __be32 * tmpp; + struct tmpbuf { + struct tmpbuf *next; + void (*release)(const void *); + void *buf; + } *to_free; + + struct svc_rqst *rqstp; + + u32 taglen; + char * tag; + u32 minorversion; + u32 opcnt; + struct nfsd4_op *ops; + struct nfsd4_op iops[8]; + int cachetype; +}; + +struct nfsd4_compoundres { + /* scratch variables for XDR encode */ + struct xdr_stream xdr; + struct svc_rqst * rqstp; + + u32 taglen; + char * tag; + u32 opcnt; + __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; +}; + +static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) +{ + struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; + return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; +} + +static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +{ + return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + || nfsd4_is_solo_sequence(resp); +} + +static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + return argp->opcnt == resp->opcnt; +} + +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); +void warn_on_nonidempotent_op(struct nfsd4_op *op); + +#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) + +static inline void +set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) +{ + BUG_ON(!fhp->fh_pre_saved); + cinfo->atomic = fhp->fh_post_saved; + cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); + + cinfo->before_change = fhp->fh_pre_change; + cinfo->after_change = fhp->fh_post_change; + cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; + cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; + cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; + cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; + +} + +int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); +int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, + struct nfsd4_compoundargs *); +int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, + struct nfsd4_compoundres *); +__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); +void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); +void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); +__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, + struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, + u32 *bmval, struct svc_rqst *, int ignore_crossmnt); +extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_setclientid *setclid); +extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_setclientid_confirm *setclientid_confirm); +extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); +extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_exchange_id *); +extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); +extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); +extern __be32 nfsd4_create_session(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_create_session *); +extern __be32 nfsd4_sequence(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_sequence *); +extern __be32 nfsd4_destroy_session(struct svc_rqst *, + struct nfsd4_compound_state *, + struct nfsd4_destroy_session *); +extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); +__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); +extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, + struct nfsd4_open *open, struct nfsd_net *nn); +extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, + struct svc_fh *current_fh, struct nfsd4_open *open); +extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); +extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); +extern __be32 nfsd4_close(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_close *close); +extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_open_downgrade *od); +extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *, + struct nfsd4_lock *lock); +extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_lockt *lockt); +extern __be32 nfsd4_locku(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_locku *locku); +extern __be32 +nfsd4_release_lockowner(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, + struct nfsd4_release_lockowner *rlockowner); +extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp); +extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr); +extern __be32 nfsd4_renew(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, clientid_t *clid); +extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); +extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); +extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); +#endif + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h new file mode 100644 index 00000000000..c5c55dfb91a --- /dev/null +++ b/fs/nfsd/xdr4cb.h @@ -0,0 +1,23 @@ +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define cb_sequence_enc_sz (sessionid_sz + 4 + \ + 1 /* no referring calls list yet */) +#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) + +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz (NFS4_STATEID_SIZE >> 2) +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) |
