diff options
Diffstat (limited to 'fs/nfs/inode.c')
| -rw-r--r-- | fs/nfs/inode.c | 2867 |
1 files changed, 1308 insertions, 1559 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 24d2fbf549b..9927913c97c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -5,7 +5,7 @@ * * nfs inode and superblock handling functions * - * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. @@ -13,10 +13,9 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> - +#include <linux/sched.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -26,102 +25,45 @@ #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/metrics.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> #include <linux/lockd/bind.h> -#include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> -#include <linux/nfs_idmap.h> #include <linux/vfs.h> +#include <linux/inet.h> +#include <linux/nfs_xdr.h> +#include <linux/slab.h> +#include <linux/compat.h> +#include <linux/freezer.h> -#include <asm/system.h> #include <asm/uaccess.h> #include "nfs4_fs.h" +#include "callback.h" #include "delegation.h" +#include "iostat.h" +#include "internal.h" +#include "fscache.h" +#include "pnfs.h" +#include "nfs.h" +#include "netns.h" -#define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_PARANOIA 1 - -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) +#include "nfstrace.h" -static void nfs_invalidate_inode(struct inode *); -static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); - -static struct inode *nfs_alloc_inode(struct super_block *sb); -static void nfs_destroy_inode(struct inode *); -static int nfs_write_inode(struct inode *,int); -static void nfs_delete_inode(struct inode *); -static void nfs_clear_inode(struct inode *); -static void nfs_umount_begin(struct super_block *); -static int nfs_statfs(struct super_block *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct vfsmount *); -static void nfs_zap_acl_cache(struct inode *); - -static struct rpc_program nfs_program; - -static struct super_operations nfs_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, - .statfs = nfs_statfs, - .clear_inode = nfs_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, -}; +#define NFSDBG_FACILITY NFSDBG_VFS -/* - * RPC cruft for NFS - */ -static struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; +#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; +/* Default is to see 64-bit inode numbers */ +static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; +static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *); -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ +static struct kmem_cache * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) @@ -129,541 +71,138 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -static int -nfs_write_inode(struct inode *inode, int sync) +/** + * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks + * @word: long word containing the bit lock + */ +int nfs_wait_bit_killable(void *word) { - int flags = sync ? FLUSH_WAIT : 0; - int ret; - - ret = nfs_commit_inode(inode, flags); - if (ret < 0) - return ret; + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); return 0; } +EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); -static void -nfs_delete_inode(struct inode * inode) -{ - dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); - - truncate_inode_pages(&inode->i_data, 0); - - nfs_wb_all(inode); - /* - * The following should never happen... - */ - if (nfs_have_writebacks(inode)) { - printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); - } - - clear_inode(inode); -} - -static void -nfs_clear_inode(struct inode *inode) +/** + * nfs_compat_user_ino64 - returns the user-visible inode number + * @fileid: 64-bit fileid + * + * This function returns a 32-bit inode number if the boot parameter + * nfs.enable_ino64 is zero. + */ +u64 nfs_compat_user_ino64(u64 fileid) { - struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred; +#ifdef CONFIG_COMPAT + compat_ulong_t ino; +#else + unsigned long ino; +#endif - nfs_wb_all(inode); - BUG_ON (!list_empty(&nfsi->open_files)); - nfs_zap_acl_cache(inode); - cred = nfsi->cache_access.cred; - if (cred) - put_rpccred(cred); - BUG_ON(atomic_read(&nfsi->data_updates) != 0); + if (enable_ino64) + return fileid; + ino = fileid; + if (sizeof(ino) < sizeof(fileid)) + ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; + return ino; } -void -nfs_umount_begin(struct super_block *sb) +int nfs_drop_inode(struct inode *inode) { - struct rpc_clnt *rpc = NFS_SB(sb)->client; - - /* -EIO all pending I/O */ - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = NFS_SB(sb)->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); + return NFS_STALE(inode) || generic_drop_inode(inode); } +EXPORT_SYMBOL_GPL(nfs_drop_inode); - -static inline unsigned long -nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +void nfs_clear_inode(struct inode *inode) { - /* make sure blocksize is a power of two */ - if ((bsize & (bsize - 1)) || nrbitsp) { - unsigned char nrbits; - - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) - ; - bsize = 1 << nrbits; - if (nrbitsp) - *nrbitsp = nrbits; - } - - return bsize; -} - -/* - * Calculate the number of 512byte blocks used. - */ -static inline unsigned long -nfs_calc_block_size(u64 tsize) -{ - loff_t used = (tsize + 511) >> 9; - return (used > ULONG_MAX) ? ULONG_MAX : used; + /* + * The following should never happen... + */ + WARN_ON_ONCE(nfs_have_writebacks(inode)); + WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); + nfs_zap_acl_cache(inode); + nfs_access_zap_cache(inode); + nfs_fscache_clear_inode(inode); } +EXPORT_SYMBOL_GPL(nfs_clear_inode); -/* - * Compute and set NFS server blocksize - */ -static inline unsigned long -nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +void nfs_evict_inode(struct inode *inode) { - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; - - return nfs_block_bits(bsize, nrbitsp); + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); + nfs_clear_inode(inode); } -/* - * Obtain the root inode of the file system. +/** + * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +int nfs_sync_mapping(struct address_space *mapping) { - struct nfs_server *server = NFS_SB(sb); - struct inode *rooti; - int error; - - error = server->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); - } - - rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); - if (!rooti) - return ERR_PTR(-ENOMEM); - return rooti; -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; - } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; - } - sb->s_root->d_op = server->rpc_ops->dentry_ops; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } - - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; - } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; - - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; - - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); - return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; -} - -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) -{ - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} - -/* - * Create an RPC client handle. - */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) -{ - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - return (struct rpc_clnt *)xprt; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; - } - - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; - - return clnt; - -out_fail: - return clnt; -} - -/* - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. - */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) -{ - struct nfs_server *server; - rpc_authflavor_t authflavor; - - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif + int ret = 0; - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } - if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; - sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); } - - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); -} - -static int -nfs_statfs(struct super_block *sb, struct kstatfs *buf) -{ - struct nfs_server *server = NFS_SB(sb); - unsigned char blockbits; - unsigned long blockres; - struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); - struct nfs_fattr fattr; - struct nfs_fsstat res = { - .fattr = &fattr, - }; - int error; - - lock_kernel(); - - error = server->rpc_ops->statfs(server, rootfh, &res); - buf->f_type = NFS_SUPER_MAGIC; - if (error < 0) - goto out_err; - - /* - * Current versions of glibc do not correctly handle the - * case where f_frsize != f_bsize. Eventually we want to - * report the value of wtmult in this field. - */ - buf->f_frsize = sb->s_blocksize; - - /* - * On most *nix systems, f_blocks, f_bfree, and f_bavail - * are reported in units of f_frsize. Linux hasn't had - * an f_frsize field in its statfs struct until recently, - * thus historically Linux's sys_statfs reports these - * fields in units of f_bsize. - */ - buf->f_bsize = sb->s_blocksize; - blockbits = sb->s_blocksize_bits; - blockres = (1 << blockbits) - 1; - buf->f_blocks = (res.tbytes + blockres) >> blockbits; - buf->f_bfree = (res.fbytes + blockres) >> blockbits; - buf->f_bavail = (res.abytes + blockres) >> blockbits; - - buf->f_files = res.tfiles; - buf->f_ffree = res.afiles; - - buf->f_namelen = server->namelen; - out: - unlock_kernel(); - - return 0; - - out_err: - printk(KERN_WARNING "nfs_statfs: statfs error = %d\n", -error); - buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; - goto out; - + return ret; } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { - static struct proc_nfs_info { - int flag; - char *str; - char *nostr; - } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, - { NFS_MOUNT_NOACL, ",noacl", "" }, - { 0, NULL, NULL } - }; - struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - char buf[12]; - char *proto; - - seq_printf(m, ",v%d", nfss->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) - seq_puts(m, nfs_infop->str); - else - seq_puts(m, nfs_infop->nostr); - } - switch (nfss->client->cl_xprt->prot) { - case IPPROTO_TCP: - proto = "tcp"; - break; - case IPPROTO_UDP: - proto = "udp"; - break; - default: - snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); - proto = buf; - } - seq_printf(m, ",proto=%s", proto); - seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); - return 0; + struct nfs_inode *nfsi = NFS_I(inode); + + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity |= flags; + if (flags & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); } /* * Invalidate the local caches */ -void -nfs_zap_caches(struct inode *inode) +static void nfs_zap_caches_locked(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; - spin_lock(&inode->i_lock); + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - else - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE); + } else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE); + nfs_zap_label_cache_locked(nfsi); +} +void nfs_zap_caches(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } -static void nfs_zap_acl_cache(struct inode *inode) +void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) +{ + if (mapping->nrpages != 0) { + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + spin_unlock(&inode->i_lock); + } +} + +void nfs_zap_acl_cache(struct inode *inode) { void (*clear_acl_cache)(struct inode *); @@ -674,18 +213,24 @@ static void nfs_zap_acl_cache(struct inode *inode) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); + +void nfs_invalidate_atime(struct inode *inode) +{ + spin_lock(&inode->i_lock); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_invalidate_atime); /* - * Invalidate, but do not unhash, the inode + * Invalidate, but do not unhash, the inode. + * NB: must be called with inode->i_lock held! */ -static void -nfs_invalidate_inode(struct inode *inode) +static void nfs_invalidate_inode(struct inode *inode) { - umode_t save_mode = inode->i_mode; - - make_bad_inode(inode); - inode->i_mode = save_mode; - nfs_zap_caches(inode); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_zap_caches_locked(inode); } struct nfs_find_desc { @@ -708,6 +253,8 @@ nfs_find_actor(struct inode *inode, void *opaque) if (NFS_FILEID(inode) != fattr->fileid) return 0; + if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) + return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; if (is_bad_inode(inode) || NFS_STALE(inode)) @@ -721,43 +268,106 @@ nfs_init_locked(struct inode *inode, void *opaque) struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; struct nfs_fattr *fattr = desc->fattr; - NFS_FILEID(inode) = fattr->fileid; + set_nfs_fileid(inode, fattr->fileid); nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } -/* Don't use READDIRPLUS on directories that we believe are too large */ -#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +static void nfs_clear_label_invalid(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; + spin_unlock(&inode->i_lock); +} + +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ + int error; + + if (label == NULL) + return; + + if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { + error = security_inode_notifysecctx(inode, label->label, + label->len); + if (error) + printk(KERN_ERR "%s() %s %d " + "security_inode_notifysecctx() %d\n", + __func__, + (char *)label->label, + label->len, error); + nfs_clear_label_invalid(inode); + } +} + +struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) +{ + struct nfs4_label *label = NULL; + int minor_version = server->nfs_client->cl_minorversion; + + if (minor_version < 2) + return label; + + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + return label; + + label = kzalloc(sizeof(struct nfs4_label), flags); + if (label == NULL) + return ERR_PTR(-ENOMEM); + + label->label = kzalloc(NFS4_MAXLABELLEN, flags); + if (label->label == NULL) { + kfree(label); + return ERR_PTR(-ENOMEM); + } + label->len = NFS4_MAXLABELLEN; + + return label; +} +EXPORT_SYMBOL_GPL(nfs4_label_alloc); +#else +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ +} +#endif +EXPORT_SYMBOL_GPL(nfs_setsecurity); /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. */ struct inode * -nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_find_desc desc = { .fh = fh, .fattr = fattr }; - struct inode *inode = NULL; + struct inode *inode = ERR_PTR(-ENOENT); unsigned long hash; - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - goto out_no_inode; + nfs_attr_check_mountpoint(sb, fattr); - if (!fattr->nlink) { - printk("NFS: Buggy server - nlink == 0!\n"); + if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) && + !nfs_attr_use_mounted_on_fileid(fattr)) + goto out_no_inode; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) goto out_no_inode; - } hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) + inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc); + if (inode == NULL) { + inode = ERR_PTR(-ENOMEM); goto out_no_inode; + } if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long now = jiffies; /* We set i_ino for the few things that still rely on it, * such as stat(2) */ @@ -766,75 +376,133 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; + if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 + && nfs_server_capable(inode, NFS_CAP_MODE)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { - inode->i_fop = &nfs_file_operations; + inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; - if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) - && fattr->size <= NFS_LIMIT_READDIRPLUS) - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + inode->i_data.a_ops = &nfs_dir_aops; + /* Deal with crossing mountpoints */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || + fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + inode->i_op = &nfs_referral_inode_operations; + else + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + inode->i_flags |= S_AUTOMOUNT; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else init_special_inode(inode, inode->i_mode, fattr->rdev); + memset(&inode->i_atime, 0, sizeof(inode->i_atime)); + memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); + memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); + inode->i_version = 0; + inode->i_size = 0; + clear_nlink(inode); + inode->i_uid = make_kuid(&init_user_ns, -2); + inode->i_gid = make_kgid(&init_user_ns, -2); + inode->i_blocks = 0; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->write_io = 0; + nfsi->read_io = 0; + nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = jiffies; - inode->i_atime = fattr->atime; - inode->i_mtime = fattr->mtime; - inode->i_ctime = fattr->ctime; - if (fattr->valid & NFS_ATTR_FATTR_V4) - nfsi->change_attr = fattr->change_attr; - inode->i_size = nfs_size_to_loff_t(fattr->size); - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; - if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + nfsi->attr_gencount = fattr->gencount; + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + inode->i_atime = fattr->atime; + else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_MTIME) + inode->i_mtime = fattr->mtime; + else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_CTIME) + inode->i_ctime = fattr->ctime; + else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) + inode->i_version = fattr->change_attr; + else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_SIZE) + inode->i_size = nfs_size_to_loff_t(fattr->size); + else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE); + if (fattr->valid & NFS_ATTR_FATTR_NLINK) + set_nlink(inode, fattr->nlink); + else if (nfs_server_capable(inode, NFS_CAP_NLINK)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_OWNER) + inode->i_uid = fattr->uid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_GROUP) + inode->i_gid = fattr->gid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; - } else { - inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } + + nfs_setsecurity(inode, fattr, label); + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - nfsi->cache_access.cred = NULL; + nfsi->attrtimeo_timestamp = now; + nfsi->access_cache = RB_ROOT; + + nfs_fscache_init_inode(inode); unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + (unsigned long long)NFS_FILEID(inode), + nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); out: return inode; out_no_inode: - printk("nfs_fhget: iget failed\n"); + dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode)); goto out; } +EXPORT_SYMBOL_GPL(nfs_fhget); -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int nfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; - struct nfs_fattr fattr; - int error; + struct nfs_fattr *fattr; + int error = -ENOMEM; + + nfs_inc_stats(inode, NFSIOS_VFSSETATTR); + + /* skip mode change if it's just for clearing setuid/setgid */ + if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) + attr->ia_valid &= ~ATTR_MODE; if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) @@ -843,29 +511,63 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if (attr->ia_valid == 0) + if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; - lock_kernel(); - nfs_begin_data_update(inode); - /* Write all dirty data if we're changing file permissions or size */ - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); + trace_nfs_setattr_enter(inode); + + /* Write all dirty data */ + if (S_ISREG(inode->i_mode)) { + nfs_inode_dio_wait(inode); nfs_wb_all(inode); } + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + goto out; /* * Return any delegations if we're going to change ACLs */ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - nfs_inode_return_delegation(inode); - error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); + NFS_PROTO(inode)->return_delegation(inode); + error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) - nfs_refresh_inode(inode, &fattr); - nfs_end_data_update(inode); - unlock_kernel(); + error = nfs_refresh_inode(inode, fattr); + nfs_free_fattr(fattr); +out: + trace_nfs_setattr_exit(inode, error); return error; } +EXPORT_SYMBOL_GPL(nfs_setattr); + +/** + * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * This is a copy of the common vmtruncate, but with the locking + * corrected to take into account the fact that NFS requires + * inode->i_size to be updated under the inode->i_lock. + */ +static int nfs_vmtruncate(struct inode * inode, loff_t offset) +{ + int err; + + err = inode_newsize_ok(inode, offset); + if (err) + goto out; + + spin_lock(&inode->i_lock); + i_size_write(inode, offset); + /* Optimisation */ + if (offset == 0) + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + + truncate_pagecache(inode, offset); +out: + return err; +} /** * nfs_setattr_update_inode - Update inode metadata after a setattr call. @@ -878,6 +580,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + spin_lock(&inode->i_lock); if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -887,49 +590,34 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL); spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { - inode->i_size = attr->ia_size; - vmtruncate(inode, attr->ia_size); + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); + nfs_vmtruncate(inode, attr->ia_size); } } +EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); -static int nfs_wait_schedule(void *word) +static void nfs_request_parent_use_readdirplus(struct dentry *dentry) { - if (signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - -/* - * Wait for the inode to get unlocked. - */ -static int nfs_wait_on_inode(struct inode *inode) -{ - struct rpc_clnt *clnt = NFS_CLIENT(inode); - struct nfs_inode *nfsi = NFS_I(inode); - sigset_t oldmask; - int error; - - rpc_clnt_sigmask(clnt, &oldmask); - error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, - nfs_wait_schedule, TASK_INTERRUPTIBLE); - rpc_clnt_sigunmask(clnt, &oldmask); + struct dentry *parent; - return error; + parent = dget_parent(dentry); + nfs_force_use_readdirplus(parent->d_inode); + dput(parent); } -static void nfs_wake_up_inode(struct inode *inode) +static bool nfs_need_revalidate_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - - clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); + if (NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + return true; + if (nfs_attribute_cache_expired(inode)) + return true; + return false; } int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) @@ -938,81 +626,226 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - if (__IS_FLG(inode, MS_NOATIME)) - need_atime = 0; - else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + trace_nfs_getattr_enter(inode); + /* Flush out writes to the server in order to update c/mtime. */ + if (S_ISREG(inode->i_mode)) { + nfs_inode_dio_wait(inode); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; + } + + /* + * We may force a getattr if the user cares about atime. + * + * Note that we only have to check the vfsmount flags here: + * - NFS always sets S_NOATIME by so checking it would give a + * bogus result + * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * no point in checking those. + */ + if ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - /* We may force a getattr if the user cares about atime */ - if (need_atime) - err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - else - err = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!err) + + if (need_atime || nfs_need_revalidate_inode(inode)) { + struct nfs_server *server = NFS_SERVER(inode); + + if (server->caps & NFS_CAP_READDIRPLUS) + nfs_request_parent_use_readdirplus(dentry); + err = __nfs_revalidate_inode(server, inode); + } + if (!err) { generic_fillattr(inode, stat); + stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + } +out: + trace_nfs_getattr_exit(inode, err); return err; } +EXPORT_SYMBOL_GPL(nfs_getattr); + +static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) +{ + atomic_set(&l_ctx->count, 1); + l_ctx->lockowner.l_owner = current->files; + l_ctx->lockowner.l_pid = current->tgid; + INIT_LIST_HEAD(&l_ctx->list); + nfs_iocounter_init(&l_ctx->io_count); +} + +static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) +{ + struct nfs_lock_context *head = &ctx->lock_context; + struct nfs_lock_context *pos = head; + + do { + if (pos->lockowner.l_owner != current->files) + continue; + if (pos->lockowner.l_pid != current->tgid) + continue; + atomic_inc(&pos->count); + return pos; + } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); + return NULL; +} + +struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) +{ + struct nfs_lock_context *res, *new = NULL; + struct inode *inode = ctx->dentry->d_inode; + + spin_lock(&inode->i_lock); + res = __nfs_find_lock_context(ctx); + if (res == NULL) { + spin_unlock(&inode->i_lock); + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + nfs_init_lock_context(new); + spin_lock(&inode->i_lock); + res = __nfs_find_lock_context(ctx); + if (res == NULL) { + list_add_tail(&new->list, &ctx->lock_context.list); + new->open_context = ctx; + res = new; + new = NULL; + } + } + spin_unlock(&inode->i_lock); + kfree(new); + return res; +} + +void nfs_put_lock_context(struct nfs_lock_context *l_ctx) +{ + struct nfs_open_context *ctx = l_ctx->open_context; + struct inode *inode = ctx->dentry->d_inode; + + if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) + return; + list_del(&l_ctx->list); + spin_unlock(&inode->i_lock); + kfree(l_ctx); +} + +/** + * nfs_close_context - Common close_context() routine NFSv2/v3 + * @ctx: pointer to context + * @is_sync: is this a synchronous close + * + * always ensure that the attributes are up to date if we're mounted + * with close-to-open semantics + */ +void nfs_close_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode; + struct nfs_server *server; + + if (!(ctx->mode & FMODE_WRITE)) + return; + if (!is_sync) + return; + inode = ctx->dentry->d_inode; + if (!list_empty(&NFS_I(inode)->open_files)) + return; + server = NFS_SERVER(inode); + if (server->flags & NFS_MOUNT_NOCTO) + return; + nfs_revalidate_inode(server, inode); +} +EXPORT_SYMBOL_GPL(nfs_close_context); -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) { struct nfs_open_context *ctx; + struct rpc_cred *cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return ERR_CAST(cred); - ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); - if (ctx != NULL) { - atomic_set(&ctx->count, 1); - ctx->dentry = dget(dentry); - ctx->cred = get_rpccred(cred); - ctx->state = NULL; - ctx->lockowner = current->files; - ctx->error = 0; - ctx->dir_cookie = 0; + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + put_rpccred(cred); + return ERR_PTR(-ENOMEM); } + nfs_sb_active(dentry->d_sb); + ctx->dentry = dget(dentry); + ctx->cred = cred; + ctx->state = NULL; + ctx->mode = f_mode; + ctx->flags = 0; + ctx->error = 0; + nfs_init_lock_context(&ctx->lock_context); + ctx->lock_context.open_context = ctx; + INIT_LIST_HEAD(&ctx->list); + ctx->mdsthreshold = NULL; return ctx; } +EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->count); + atomic_inc(&ctx->lock_context.count); return ctx; } +EXPORT_SYMBOL_GPL(get_nfs_open_context); + +static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) +{ + struct inode *inode = ctx->dentry->d_inode; + struct super_block *sb = ctx->dentry->d_sb; + + if (!list_empty(&ctx->list)) { + if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } else if (!atomic_dec_and_test(&ctx->lock_context.count)) + return; + if (inode != NULL) + NFS_PROTO(inode)->close_context(ctx, is_sync); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->dentry); + nfs_sb_deactive(sb); + kfree(ctx->mdsthreshold); + kfree(ctx); +} void put_nfs_open_context(struct nfs_open_context *ctx) { - if (atomic_dec_and_test(&ctx->count)) { - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } - if (ctx->state != NULL) - nfs4_close_state(ctx->state, ctx->mode); - if (ctx->cred != NULL) - put_rpccred(ctx->cred); - dput(ctx->dentry); - kfree(ctx); - } + __put_nfs_open_context(ctx, 0); } +EXPORT_SYMBOL_GPL(put_nfs_open_context); /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +void nfs_inode_attach_open_context(struct nfs_open_context *ctx) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - filp->private_data = get_nfs_open_context(ctx); spin_lock(&inode->i_lock); list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); + +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +{ + filp->private_data = get_nfs_open_context(ctx); + if (list_empty(&ctx->list)) + nfs_inode_attach_open_context(ctx); +} +EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* * Given an inode, search for an open context with the desired characteristics */ -struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode) +struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; @@ -1021,26 +854,27 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c list_for_each_entry(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; - if ((pos->mode & mode) == mode) { - ctx = get_nfs_open_context(pos); - break; - } + if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) + continue; + ctx = get_nfs_open_context(pos); + break; } spin_unlock(&inode->i_lock); return ctx; } -void nfs_file_clear_open_context(struct file *filp) +static void nfs_file_clear_open_context(struct file *filp) { - struct inode *inode = filp->f_dentry->d_inode; - struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { + struct inode *inode = ctx->dentry->d_inode; + filp->private_data = NULL; spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); - put_nfs_open_context(ctx); + __put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1); } } @@ -1050,18 +884,13 @@ void nfs_file_clear_open_context(struct file *filp) int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - struct rpc_cred *cred; - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_dentry, cred); - put_rpccred(cred); - if (ctx == NULL) - return -ENOMEM; - ctx->mode = filp->f_mode; + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); + nfs_fscache_open_file(inode, filp); return 0; } @@ -1079,83 +908,68 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; - struct nfs_fattr fattr; + struct nfs4_label *label = NULL; + struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); - unsigned long verifier; - unsigned long cache_validity; - dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", - inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n", + inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); + + trace_nfs_revalidate_inode_enter(inode); - lock_kernel(); - if (!inode || is_bad_inode(inode)) - goto out_nowait; + if (is_bad_inode(inode)) + goto out; if (NFS_STALE(inode)) - goto out_nowait; + goto out; - status = nfs_wait_on_inode(inode); - if (status < 0) + status = -ENOMEM; + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + goto out; + + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); + + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (IS_ERR(label)) { + status = PTR_ERR(label); goto out; - if (NFS_STALE(inode)) { - status = -ESTALE; - /* Do we trust the cached ESTALE? */ - if (NFS_ATTRTIMEO(inode) != 0) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { - /* no */ - } else - goto out; - } } - /* Protect against RPC races by saving the change attribute */ - verifier = nfs_save_change_attribute(inode); - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); if (status != 0) { - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); + (unsigned long long)NFS_FILEID(inode), status); if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } - goto out; + goto err_out; } - spin_lock(&inode->i_lock); - status = nfs_update_inode(inode, &fattr, verifier); + status = nfs_refresh_inode(inode, fattr); if (status) { - spin_unlock(&inode->i_lock); - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); - goto out; + (unsigned long long)NFS_FILEID(inode), status); + goto err_out; } - cache_validity = nfsi->cache_validity; - nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; - /* - * We may need to keep the attributes marked as invalid if - * we raced with nfs_end_attr_update(). - */ - if (time_after_eq(verifier, nfsi->cache_change_attribute)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); - spin_unlock(&inode->i_lock); - - nfs_revalidate_mapping(inode, inode->i_mapping); - - if (cache_validity & NFS_INO_INVALID_ACL) + if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); + nfs_setsecurity(inode, fattr, label); - out: - nfs_wake_up_inode(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", + inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode)); - out_nowait: - unlock_kernel(); +err_out: + nfs4_label_free(label); +out: + nfs_free_fattr(fattr); + trace_nfs_revalidate_inode_exit(inode, status); return status; } @@ -1163,9 +977,14 @@ int nfs_attribute_timeout(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfs_have_delegation(inode, FMODE_READ)) + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + +int nfs_attribute_cache_expired(struct inode *inode) +{ + if (nfs_have_delegated_attributes(inode)) return 0; - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + return nfs_attribute_timeout(inode); } /** @@ -1177,75 +996,152 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) - && !nfs_attribute_timeout(inode)) + if (!nfs_need_revalidate_inode(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } +EXPORT_SYMBOL_GPL(nfs_revalidate_inode); -/** - * nfs_revalidate_mapping - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + int ret; - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(mapping) == 0) - filemap_fdatawait(mapping); - nfs_wb_all(inode); + ret = nfs_sync_mapping(mapping); + if (ret < 0) + return ret; } - invalidate_inode_pages2(mapping); - + ret = invalidate_inode_pages2(mapping); + if (ret < 0) + return ret; + } + if (S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) { - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - /* This ensures we revalidate child dentries */ - nfsi->cache_change_attribute = jiffies; - } + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); - - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); } + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + nfs_fscache_wait_on_invalidate(inode); + + dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n", + inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode)); + return 0; } -/** - * nfs_begin_data_update - * @inode - pointer to inode - * Declare that a set of operations will update file data on the server - */ -void nfs_begin_data_update(struct inode *inode) +static bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - atomic_inc(&NFS_I(inode)->data_updates); + if (nfs_have_delegated_attributes(inode)) + return false; + return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) + || NFS_STALE(inode); } /** - * nfs_end_data_update - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will mark the inode as immediately needing revalidation - * of its attribute cache. + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping */ -void nfs_end_data_update(struct inode *inode) +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long *bitlock = &nfsi->flags; + int ret = 0; + + /* swapfiles are not supposed to be shared. */ + if (IS_SWAPFILE(inode)) + goto out; - if (!nfs_have_delegation(inode, FMODE_READ)) { - /* Directories and symlinks: invalidate page cache */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + if (nfs_mapping_need_revalidate_inode(inode)) { + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (ret < 0) + goto out; + } + + /* + * We must clear NFS_INO_INVALID_DATA first to ensure that + * invalidations that come in while we're shooting down the mappings + * are respected. But, that leaves a race window where one revalidator + * can clear the flag, and then another checks it before the mapping + * gets invalidated. Fix that by serializing access to this part of + * the function. + * + * At the same time, we need to allow other tasks to see whether we + * might be in the middle of invalidating the pages, so we only set + * the bit lock here if it looks like we're going to be doing that. + */ + for (;;) { + ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + goto out; + spin_lock(&inode->i_lock); + if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); + continue; } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + break; + spin_unlock(&inode->i_lock); + goto out; } - nfsi->cache_change_attribute = jiffies; - atomic_dec(&nfsi->data_updates); + + set_bit(NFS_INO_INVALIDATING, bitlock); + smp_wmb(); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + trace_nfs_invalidate_mapping_enter(inode); + ret = nfs_invalidate_mapping(inode, mapping); + trace_nfs_invalidate_mapping_exit(inode, ret); + + clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_INO_INVALIDATING); +out: + return ret; +} + +static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long ret = 0; + + if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) + && (fattr->valid & NFS_ATTR_FATTR_CHANGE) + && inode->i_version == fattr->pre_change_attr) { + inode->i_version = fattr->change_attr; + if (S_ISDIR(inode->i_mode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + ret |= NFS_INO_INVALID_ATTR; + } + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) + && (fattr->valid & NFS_ATTR_FATTR_CTIME) + && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + ret |= NFS_INO_INVALID_ATTR; + } + + if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) + && (fattr->valid & NFS_ATTR_FATTR_MTIME) + && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + if (S_ISDIR(inode->i_mode)) + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); + ret |= NFS_INO_INVALID_ATTR; + } + if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) + && (fattr->valid & NFS_ATTR_FATTR_SIZE) + && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) + && nfsi->npages == 0) { + i_size_write(inode, nfs_size_to_loff_t(fattr->size)); + ret |= NFS_INO_INVALID_ATTR; + } + + return ret; } /** @@ -1261,73 +1157,235 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; - int data_unstable; - - - /* Are we in the process of updating data on the server? */ - data_unstable = nfs_caches_unstable(inode); + unsigned long invalid = 0; - if (fattr->valid & NFS_ATTR_FATTR_V4) { - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 - && nfsi->change_attr == fattr->pre_change_attr) - nfsi->change_attr = fattr->change_attr; - if (nfsi->change_attr != fattr->change_attr) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } - } - if ((fattr->valid & NFS_ATTR_FATTR) == 0) { + if (nfs_have_delegated_attributes(inode)) return 0; - } - /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) + return -EIO; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) return -EIO; - } - - cur_size = i_size_read(inode); - new_isize = nfs_size_to_loff_t(fattr->size); - /* If we have atomic WCC data, we may update some attributes */ - if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + inode->i_version != fattr->change_attr) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } - if (cur_size != new_isize) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (nfsi->npages == 0) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) + invalid |= NFS_INO_INVALID_ATTR; + + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + if (cur_size != new_isize && nfsi->npages == 0) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } /* Have any file permissions changed? */ - if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) - || inode->i_uid != fattr->uid - || inode->i_gid != fattr->gid) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) + invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ - if (inode->i_nlink != fattr->nlink) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) + invalid |= NFS_INO_INVALID_ATTR; + + if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime)) + invalid |= NFS_INO_INVALID_ATIME; - if (!timespec_equal(&inode->i_atime, &fattr->atime)) - nfsi->cache_validity |= NFS_INO_INVALID_ATIME; + if (invalid != 0) + nfs_set_cache_invalid(inode, invalid); nfsi->read_cache_jiffies = fattr->time_start; return 0; } +static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_CTIME)) + return 0; + return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; +} + +static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return 0; + return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); +} + +static atomic_long_t nfs_attr_generation_counter; + +static unsigned long nfs_read_attr_generation_counter(void) +{ + return atomic_long_read(&nfs_attr_generation_counter); +} + +unsigned long nfs_inc_attr_generation_counter(void) +{ + return atomic_long_inc_return(&nfs_attr_generation_counter); +} + +void nfs_fattr_init(struct nfs_fattr *fattr) +{ + fattr->valid = 0; + fattr->time_start = jiffies; + fattr->gencount = nfs_inc_attr_generation_counter(); + fattr->owner_name = NULL; + fattr->group_name = NULL; +} +EXPORT_SYMBOL_GPL(nfs_fattr_init); + +struct nfs_fattr *nfs_alloc_fattr(void) +{ + struct nfs_fattr *fattr; + + fattr = kmalloc(sizeof(*fattr), GFP_NOFS); + if (fattr != NULL) + nfs_fattr_init(fattr); + return fattr; +} +EXPORT_SYMBOL_GPL(nfs_alloc_fattr); + +struct nfs_fh *nfs_alloc_fhandle(void) +{ + struct nfs_fh *fh; + + fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS); + if (fh != NULL) + fh->size = 0; + return fh; +} +EXPORT_SYMBOL_GPL(nfs_alloc_fhandle); + +#ifdef NFS_DEBUG +/* + * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle + * in the same way that wireshark does + * + * @fh: file handle + * + * For debugging only. + */ +u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) +{ + /* wireshark uses 32-bit AUTODIN crc and does a bitwise + * not on the result */ + return nfs_fhandle_hash(fh); +} +EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash); + +/* + * _nfs_display_fhandle - display an NFS file handle on the console + * + * @fh: file handle to display + * @caption: display caption + * + * For debugging only. + */ +void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) +{ + unsigned short i; + + if (fh == NULL || fh->size == 0) { + printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); + return; + } + + printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", + caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); + for (i = 0; i < fh->size; i += 16) { + __be32 *pos = (__be32 *)&fh->data[i]; + + switch ((fh->size - i - 1) >> 2) { + case 0: + printk(KERN_DEFAULT " %08x\n", + be32_to_cpup(pos)); + break; + case 1: + printk(KERN_DEFAULT " %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1)); + break; + case 2: + printk(KERN_DEFAULT " %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2)); + break; + default: + printk(KERN_DEFAULT " %08x %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); + } + } +} +EXPORT_SYMBOL_GPL(_nfs_display_fhandle); +#endif + +/** + * nfs_inode_attrs_need_update - check if the inode attributes need updating + * @inode - pointer to inode + * @fattr - attributes + * + * Attempt to divine whether or not an RPC call reply carrying stale + * attributes got scheduled after another call carrying updated ones. + * + * To do so, the function first assumes that a more recent ctime means + * that the attributes in fattr are newer, however it also attempt to + * catch the case where ctime either didn't change, or went backwards + * (if someone reset the clock on the server) by looking at whether + * or not this RPC call was started after the inode was last updated. + * Note also the check for wraparound of 'attr_gencount' + * + * The function returns 'true' if it thinks the attributes in 'fattr' are + * more recent than the ones cached in the inode. + * + */ +static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + const struct nfs_inode *nfsi = NFS_I(inode); + + return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || + nfs_ctime_need_update(inode, fattr) || + nfs_size_need_update(inode, fattr) || + ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); +} + +/* + * Don't trust the change_attribute, mtime, ctime or size if + * a pnfs LAYOUTCOMMIT is outstanding + */ +static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode, + struct nfs_fattr *fattr) +{ + if (pnfs_layoutcommit_outstanding(inode)) + fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE | + NFS_ATTR_FATTR_MTIME | + NFS_ATTR_FATTR_CTIME | + NFS_ATTR_FATTR_SIZE); +} + +static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + int ret; + + trace_nfs_refresh_inode_enter(inode); + + nfs_inode_attrs_handle_layoutcommit(inode, fattr); + + if (nfs_inode_attrs_need_update(inode, fattr)) + ret = nfs_update_inode(inode, fattr); + else + ret = nfs_check_inode_attributes(inode, fattr); + + trace_nfs_refresh_inode_exit(inode, ret); + return ret; +} + /** * nfs_refresh_inode - try to update the inode attribute cache * @inode - pointer to inode @@ -1340,23 +1398,29 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat */ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); int status; if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; - if (nfs_verify_change_attribute(inode, fattr->time_start)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); - if (time_after(fattr->time_start, nfsi->last_updated)) - status = nfs_update_inode(inode, fattr, fattr->time_start); - else - status = nfs_check_inode_attributes(inode, fattr); - + status = nfs_refresh_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); + return status; } +EXPORT_SYMBOL_GPL(nfs_refresh_inode); + +static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + + if (S_ISDIR(inode->i_mode)) + invalid |= NFS_INO_INVALID_DATA; + nfs_set_cache_invalid(inode, invalid); + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + return nfs_refresh_inode_locked(inode, fattr); +} /** * nfs_post_op_update_inode - try to update the inode attribute cache @@ -1365,25 +1429,76 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) * * After an operation that has changed the inode metadata, mark the * attribute cache as being invalid, then try to update it. + * + * NB: if the server didn't return any post op attributes, this + * function will force the retrieval of attributes before the next + * NFS request. Thus it should be used only for operations that + * are expected to change one or more attributes, to avoid + * unnecessary NFS requests and trips through nfs_update_inode(). */ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); - int status = 0; + int status; spin_lock(&inode->i_lock); - if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; - goto out; - } - status = nfs_update_inode(inode, fattr, fattr->time_start); - if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute)) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); - nfsi->cache_change_attribute = jiffies; -out: + status = nfs_post_op_update_inode_locked(inode, fattr); + spin_unlock(&inode->i_lock); + + return status; +} +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + /* Don't do a WCC update if these attributes are already stale */ + if ((fattr->valid & NFS_ATTR_FATTR) == 0 || + !nfs_inode_attrs_need_update(inode, fattr)) { + fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE + | NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PREMTIME + | NFS_ATTR_FATTR_PRECTIME); + goto out_noforce; + } + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { + fattr->pre_change_attr = inode->i_version; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } + if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) { + memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); + fattr->valid |= NFS_ATTR_FATTR_PRECTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) { + memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); + fattr->valid |= NFS_ATTR_FATTR_PREMTIME; + } + if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 && + (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) { + fattr->pre_size = i_size_read(inode); + fattr->valid |= NFS_ATTR_FATTR_PRESIZE; + } +out_noforce: + status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } +EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc); /* * Many nfs protocol calls return the new file attributes after @@ -1397,735 +1512,349 @@ out: * * A very similar scenario holds for the dir cache. */ -static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; - unsigned int invalid = 0; - int data_unstable; + unsigned long invalid = 0; + unsigned long now = jiffies; + unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", - __FUNCTION__, inode->i_sb->s_id, inode->i_ino, + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", + __func__, inode->i_sb->s_id, inode->i_ino, + nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - - if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "%s: inode number mismatch\n" - "expected (%s/0x%Lx), got (%s/0x%Lx)\n", - __FUNCTION__, - inode->i_sb->s_id, (long long)nfsi->fileid, - inode->i_sb->s_id, (long long)fattr->fileid); + if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) { + printk(KERN_ERR "NFS: server %s error: fileid changed\n" + "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + inode->i_sb->s_id, (long long)nfsi->fileid, + (long long)fattr->fileid); goto out_err; } /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) - goto out_changed; + if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + /* + * Big trouble! The inode has become a different object. + */ + printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n", + __func__, inode->i_ino, inode->i_mode, fattr->mode); + goto out_err; + } + + server = NFS_SERVER(inode); + /* Update the fsid? */ + if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && + !nfs_fsid_equal(&server->fsid, &fattr->fsid) && + !IS_AUTOMOUNT(inode)) + server->fsid = fattr->fsid; /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = jiffies; - - /* Are we racing with known updates of the metadata on the server? */ - data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || - (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)); - /* Check if our cached file size is stale */ - new_isize = nfs_size_to_loff_t(fattr->size); - cur_isize = i_size_read(inode); - if (new_isize != cur_isize) { - /* Do we perhaps have any outstanding writes? */ - if (nfsi->npages == 0) { - /* No, but did we race with nfs_end_data_update()? */ - if (time_after_eq(verifier, nfsi->cache_change_attribute)) { - inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_DATA; - } - invalid |= NFS_INO_INVALID_ATTR; - } else if (new_isize > cur_isize) { - inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + save_cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED + | NFS_INO_REVAL_PAGECACHE); + + /* Do atomic weak cache consistency updates */ + invalid |= nfs_wcc_update_inode(inode, fattr); + + /* More cache consistency checks */ + if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { + if (inode->i_version != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE; + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); + inode->i_version = fattr->change_attr; } - dprintk("NFS: isize change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - } + } else if (server->caps & NFS_CAP_CHANGE_ATTR) + nfsi->cache_validity |= save_cache_validity; - /* Check if the mtime agrees */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - dprintk("NFS: mtime change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - } + } else if (server->caps & NFS_CAP_MTIME) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); - if ((fattr->valid & NFS_ATTR_FATTR_V4) - && nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - } - - /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { - if (!data_unstable) - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } - memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + } else if (server->caps & NFS_CAP_CTIME) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); - if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || - inode->i_uid != fattr->uid || - inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - - inode->i_mode = fattr->mode; - inode->i_nlink = fattr->nlink; - inode->i_uid = fattr->uid; - inode->i_gid = fattr->gid; + /* Check if our cached file size is stale */ + if (fattr->valid & NFS_ATTR_FATTR_SIZE) { + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes, or has + * the file grown beyond our last write? */ + if ((nfsi->npages == 0) || new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid &= ~NFS_INO_REVAL_PAGECACHE; + } + dprintk("NFS: isize change on server for file %s/%ld " + "(%Ld to %Ld)\n", + inode->i_sb->s_id, + inode->i_ino, + (long long)cur_isize, + (long long)new_isize); + } + } else + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); + + + if (fattr->valid & NFS_ATTR_FATTR_ATIME) + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + else if (server->caps & NFS_CAP_ATIME) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_MODE) { + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + umode_t newmode = inode->i_mode & S_IFMT; + newmode |= fattr->mode & S_IALLUGO; + inode->i_mode = newmode; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + } + } else if (server->caps & NFS_CAP_MODE) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_OWNER) { + if (!uid_eq(inode->i_uid, fattr->uid)) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_uid = fattr->uid; + } + } else if (server->caps & NFS_CAP_OWNER) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_GROUP) { + if (!gid_eq(inode->i_gid, fattr->gid)) { + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + inode->i_gid = fattr->gid; + } + } else if (server->caps & NFS_CAP_OWNER_GROUP) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + + if (fattr->valid & NFS_ATTR_FATTR_NLINK) { + if (inode->i_nlink != fattr->nlink) { + invalid |= NFS_INO_INVALID_ATTR; + if (S_ISDIR(inode->i_mode)) + invalid |= NFS_INO_INVALID_DATA; + set_nlink(inode, fattr->nlink); + } + } else if (server->caps & NFS_CAP_NLINK) + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); - if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - inode->i_blksize = inode->i_sb->s_blocksize; - } else { - inode->i_blocks = fattr->du.nfs2.blocks; - inode->i_blksize = fattr->du.nfs2.blocksize; } + if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + inode->i_blocks = fattr->du.nfs2.blocks; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; - } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; + nfsi->attrtimeo_timestamp = now; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); + } else { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; + } } + invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - if (!nfs_have_delegation(inode, FMODE_READ)) - nfsi->cache_validity |= invalid; + if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || + (save_cache_validity & NFS_INO_REVAL_FORCED)) + nfs_set_cache_invalid(inode, invalid); return 0; - out_changed: - /* - * Big trouble! The inode has become a different object. - */ -#ifdef NFS_PARANOIA - printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", - __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); -#endif + out_err: /* * No need to worry about unhashing the dentry, as the * lookup validation will know that the inode is bad. * (But we fall through to invalidate the caches.) */ nfs_invalidate_inode(inode); - out_err: - set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); return -ESTALE; } -/* - * File system information - */ - -static int nfs_set_super(struct super_block *s, void *data) +struct inode *nfs_alloc_inode(struct super_block *sb) { - s->s_fs_info = data; - return set_anon_super(s, data); -} - -static int nfs_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) - return 0; - if (old->addr.sin_port != server->addr.sin_port) - return 0; - return !nfs_compare_fh(&old->fh, &server->fh); + struct nfs_inode *nfsi; + nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); + if (!nfsi) + return NULL; + nfsi->flags = 0UL; + nfsi->cache_validity = 0UL; +#if IS_ENABLED(CONFIG_NFS_V4) + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ + return &nfsi->vfs_inode; } +EXPORT_SYMBOL_GPL(nfs_alloc_inode); -static struct super_block *nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static void nfs_i_callback(struct rcu_head *head) { - int error; - struct nfs_server *server = NULL; - struct super_block *s; - struct nfs_fh *root; - struct nfs_mount_data *data = raw_data; - - s = ERR_PTR(-EINVAL); - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err; - } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err; - } - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - goto out_err; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - goto out_err; - } - case 5: - memset(data->context, 0, sizeof(data->context)); - } -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - s = ERR_PTR(-EPROTONOSUPPORT); - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err; - } -#endif /* CONFIG_NFS_V3 */ - - s = ERR_PTR(-ENOMEM); - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - goto out_err; - memset(server, 0, sizeof(struct nfs_server)); - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - s = ERR_PTR(-EINVAL); - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_err; - } - - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - if (IS_ERR(s) || s->s_root) - goto out_rpciod_down; - - s->s_flags = flags; - - error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_rpciod_down: - rpciod_down(); -out_err: - kfree(server); - return s; + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } -static void nfs_kill_super(struct super_block *s) +void nfs_destroy_inode(struct inode *inode) { - struct nfs_server *server = NFS_SB(s); - - kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - rpciod_down(); /* release rpciod */ - - if (server->hostname != NULL) - kfree(server->hostname); - kfree(server); + call_rcu(&inode->i_rcu, nfs_i_callback); } +EXPORT_SYMBOL_GPL(nfs_destroy_inode); -static struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .get_sb = nfs_get_sb, - .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -#ifdef CONFIG_NFS_V4 - -static void nfs4_clear_inode(struct inode *); - - -static struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, - .statfs = nfs_statfs, - .clear_inode = nfs4_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, -}; - -/* - * Clean out any remaining NFSv4 state that might be left over due - * to open() calls that passed nfs_atomic_lookup, but failed to call - * nfs_open(). - */ -static void nfs4_clear_inode(struct inode *inode) +static inline void nfs4_init_once(struct nfs_inode *nfsi) { - struct nfs_inode *nfsi = NFS_I(inode); - - /* If we are holding a delegation, return it! */ - nfs_inode_return_delegation(inode); - /* First call standard NFS clear_inode() code */ - nfs_clear_inode(inode); - /* Now clear out any remaining state */ - while (!list_empty(&nfsi->open_states)) { - struct nfs4_state *state; - - state = list_entry(nfsi->open_states.next, - struct nfs4_state, - inode_states); - dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", - __FUNCTION__, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - state); - BUG_ON(atomic_read(&state->count) != 1); - nfs4_close_state(state, state->state); - } +#if IS_ENABLED(CONFIG_NFS_V4) + INIT_LIST_HEAD(&nfsi->open_states); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + init_rwsem(&nfsi->rwsem); + nfsi->layout = NULL; +#endif } - -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +static void init_once(void *foo) { - struct nfs_server *server; - struct nfs4_client *clp = NULL; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - struct rpc_timeout timeparms; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - server->rpc_ops = &nfs_v4_clientops; - - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - clp = nfs4_get_client(&server->addr.sin_addr); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return -EIO; - } - - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(data->proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); - if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; - clp->cl_rpcclient = clnt; - clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); - if (IS_ERR(clp->cl_cred)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clp->cl_cred); - clp->cl_cred = NULL; - goto out_fail; - } - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); - } - if (list_empty(&clp->cl_superblocks)) { - err = nfs4_init_client(clp); - if (err != 0) { - up_write(&clp->cl_sem); - goto out_fail; - } - } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs4_state = clp; - up_write(&clp->cl_sem); - clp = NULL; - - if (IS_ERR(clnt)) { - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return err; - } - - server->client = clnt; - - if (server->nfs4_state->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return -ENOMEM; - } - - if (clnt->cl_auth->au_flavor != authflavour) { - struct rpc_auth *auth; - - auth = rpcauth_create(authflavour, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return PTR_ERR(auth); - } - } - - sb->s_time_gran = 1; + struct nfs_inode *nfsi = (struct nfs_inode *) foo; - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - if (err == 0) - return 0; -out_fail: - if (clp) - nfs4_put_client(clp); - return err; + inode_init_once(&nfsi->vfs_inode); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); + INIT_LIST_HEAD(&nfsi->commit_info.list); + nfsi->npages = 0; + nfsi->commit_info.ncommit = 0; + atomic_set(&nfsi->commit_info.rpcs_out, 0); + atomic_set(&nfsi->silly_count, 1); + INIT_HLIST_HEAD(&nfsi->silly_list); + init_waitqueue_head(&nfsi->waitqueue); + nfs4_init_once(nfsi); } -static int nfs4_compare_super(struct super_block *sb, void *data) +static int __init nfs_init_inodecache(void) { - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); + nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", + sizeof(struct nfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once); + if (nfs_inode_cachep == NULL) + return -ENOMEM; - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; + return 0; } -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +static void nfs_destroy_inodecache(void) { - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - if (p != NULL) - kfree(p); - return ERR_PTR(-EFAULT); - } - dst[maxlen] = '\0'; - return dst; + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(nfs_inode_cachep); } -static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - int error; - struct nfs_server *server; - struct super_block *s; - struct nfs4_mount_data *data = raw_data; - void *p; +struct workqueue_struct *nfsiod_workqueue; +EXPORT_SYMBOL_GPL(nfsiod_workqueue); - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return ERR_PTR(-ENOMEM); - memset(server, 0, sizeof(struct nfs_server)); - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - p = nfs_copy_user_string(NULL, &data->hostname, 256); - if (IS_ERR(p)) - goto out_err; - server->hostname = p; - - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); - if (IS_ERR(p)) - goto out_err; - server->mnt_path = p; - - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); - if (IS_ERR(p)) - goto out_err; - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - s = ERR_PTR(-EINVAL); - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - s = ERR_PTR(-EFAULT); - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - s = ERR_PTR(-EINVAL); - goto out_free; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_free; - } - - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) - goto out_free; - - s->s_flags = flags; - - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_err: - s = (struct super_block *)p; -out_free: - if (server->mnt_path) - kfree(server->mnt_path); - if (server->hostname) - kfree(server->hostname); - kfree(server); - return s; -} - -static void nfs4_kill_super(struct super_block *sb) +/* + * start up the nfsiod workqueue + */ +static int nfsiod_start(void) { - struct nfs_server *server = NFS_SB(sb); - - nfs_return_all_delegations(sb); - kill_anon_super(sb); - - nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); - rpciod_down(); /* release rpciod */ - - destroy_nfsv4_state(server); - - if (server->hostname != NULL) - kfree(server->hostname); - kfree(server); + struct workqueue_struct *wq; + dprintk("RPC: creating workqueue nfsiod\n"); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); + if (wq == NULL) + return -ENOMEM; + nfsiod_workqueue = wq; + return 0; } -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .get_sb = nfs4_get_sb, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -#define nfs4_init_once(nfsi) \ - do { \ - INIT_LIST_HEAD(&(nfsi)->open_states); \ - nfsi->delegation = NULL; \ - nfsi->delegation_state = 0; \ - init_rwsem(&nfsi->rwsem); \ - } while(0) -#define register_nfs4fs() register_filesystem(&nfs4_fs_type) -#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) -#else -#define nfs4_init_once(nfsi) \ - do { } while (0) -#define register_nfs4fs() (0) -#define unregister_nfs4fs() -#endif - -extern int nfs_init_nfspagecache(void); -extern void nfs_destroy_nfspagecache(void); -extern int nfs_init_readpagecache(void); -extern void nfs_destroy_readpagecache(void); -extern int nfs_init_writepagecache(void); -extern void nfs_destroy_writepagecache(void); -#ifdef CONFIG_NFS_DIRECTIO -extern int nfs_init_directcache(void); -extern void nfs_destroy_directcache(void); -#endif - -static kmem_cache_t * nfs_inode_cachep; - -static struct inode *nfs_alloc_inode(struct super_block *sb) +/* + * Destroy the nfsiod workqueue + */ +static void nfsiod_stop(void) { - struct nfs_inode *nfsi; - nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); - if (!nfsi) - return NULL; - nfsi->flags = 0UL; - nfsi->cache_validity = 0UL; -#ifdef CONFIG_NFS_V3_ACL - nfsi->acl_access = ERR_PTR(-EAGAIN); - nfsi->acl_default = ERR_PTR(-EAGAIN); -#endif -#ifdef CONFIG_NFS_V4 - nfsi->nfs4_acl = NULL; -#endif /* CONFIG_NFS_V4 */ - return &nfsi->vfs_inode; -} + struct workqueue_struct *wq; -static void nfs_destroy_inode(struct inode *inode) -{ - kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); + wq = nfsiod_workqueue; + if (wq == NULL) + return; + nfsiod_workqueue = NULL; + destroy_workqueue(wq); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) -{ - struct nfs_inode *nfsi = (struct nfs_inode *) foo; +int nfs_net_id; +EXPORT_SYMBOL_GPL(nfs_net_id); - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&nfsi->vfs_inode); - spin_lock_init(&nfsi->req_lock); - INIT_LIST_HEAD(&nfsi->dirty); - INIT_LIST_HEAD(&nfsi->commit); - INIT_LIST_HEAD(&nfsi->open_files); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); - atomic_set(&nfsi->data_updates, 0); - nfsi->ndirty = 0; - nfsi->ncommit = 0; - nfsi->npages = 0; - nfs4_init_once(nfsi); - } -} - -static int nfs_init_inodecache(void) +static int nfs_net_init(struct net *net) { - nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", - sizeof(struct nfs_inode), - 0, SLAB_RECLAIM_ACCOUNT, - init_once, NULL); - if (nfs_inode_cachep == NULL) - return -ENOMEM; - + nfs_clients_init(net); return 0; } -static void nfs_destroy_inodecache(void) +static void nfs_net_exit(struct net *net) { - if (kmem_cache_destroy(nfs_inode_cachep)) - printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n"); + nfs_cleanup_cb_ident_idr(net); } +static struct pernet_operations nfs_net_ops = { + .init = nfs_net_init, + .exit = nfs_net_exit, + .id = &nfs_net_id, + .size = sizeof(struct nfs_net), +}; + /* * Initialize NFS */ @@ -2133,75 +1862,95 @@ static int __init init_nfs_fs(void) { int err; + err = register_pernet_subsys(&nfs_net_ops); + if (err < 0) + goto out9; + + err = nfs_fscache_register(); + if (err < 0) + goto out8; + + err = nfsiod_start(); + if (err) + goto out7; + + err = nfs_fs_proc_init(); + if (err) + goto out6; + err = nfs_init_nfspagecache(); if (err) - goto out4; + goto out5; err = nfs_init_inodecache(); if (err) - goto out3; + goto out4; err = nfs_init_readpagecache(); if (err) - goto out2; + goto out3; err = nfs_init_writepagecache(); if (err) - goto out1; + goto out2; -#ifdef CONFIG_NFS_DIRECTIO err = nfs_init_directcache(); if (err) - goto out0; -#endif + goto out1; #ifdef CONFIG_PROC_FS - rpc_proc_register(&nfs_rpcstat); + rpc_proc_register(&init_net, &nfs_rpcstat); #endif - err = register_filesystem(&nfs_fs_type); - if (err) - goto out; - if ((err = register_nfs4fs()) != 0) - goto out; + if ((err = register_nfs_fs()) != 0) + goto out0; + return 0; -out: +out0: #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif - nfs_destroy_writepagecache(); -#ifdef CONFIG_NFS_DIRECTIO -out0: nfs_destroy_directcache(); -#endif out1: - nfs_destroy_readpagecache(); + nfs_destroy_writepagecache(); out2: - nfs_destroy_inodecache(); + nfs_destroy_readpagecache(); out3: - nfs_destroy_nfspagecache(); + nfs_destroy_inodecache(); out4: + nfs_destroy_nfspagecache(); +out5: + nfs_fs_proc_exit(); +out6: + nfsiod_stop(); +out7: + nfs_fscache_unregister(); +out8: + unregister_pernet_subsys(&nfs_net_ops); +out9: return err; } static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_DIRECTIO nfs_destroy_directcache(); -#endif nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); + nfs_fscache_unregister(); + unregister_pernet_subsys(&nfs_net_ops); #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif - unregister_filesystem(&nfs_fs_type); - unregister_nfs4fs(); + unregister_nfs_fs(); + nfs_fs_proc_exit(); + nfsiod_stop(); } /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); MODULE_LICENSE("GPL"); +module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) module_exit(exit_nfs_fs) |
