diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-04 19:55:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-04 19:55:11 -0700 |
commit | 4d4700707c0d4be0efc968989fb1cd01c60c0a35 (patch) | |
tree | 478453a4ae9453bd8d26ffc3df6eedcc30799a43 | |
parent | 7e20ef030dde0e52dd5a57220ee82fa9facbea4e (diff) | |
parent | 84dde76c4a2d99ed2d7de6ec82c53b56620900a3 (diff) |
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (28 commits)
NFS: Fix a compile glitch on 64-bit systems
NFS: Clean up nfs_create_request comments
spkm3: initialize hash
spkm3: remove bad kfree, unnecessary export
spkm3: fix spkm3's use of hmac
NFS4: invalidate cached acl on setacl
NFS: Fix directory caching problem - with test case and patch.
NFS: Set meaningful value for fattr->time_start in readdirplus results.
NFS: Added support to turn off the NFSv3 READDIRPLUS RPC.
SUNRPC: RPC client should retry with different versions of rpcbind
SUNRPC: remove old portmapper
NFS: switch NFSROOT to use new rpcbind client
SUNRPC: switch the RPC server to use the new rpcbind registration API
SUNRPC: switch socket-based RPC transports to use rpcbind
SUNRPC: introduce rpcbind: replacement for in-kernel portmapper
SUNRPC: Eliminate side effects from rpc_malloc
SUNRPC: RPC buffer size estimates are too large
NLM: Shrink the maximum request size of NLM4 requests
NFS: Use pgoff_t in structures and functions that pass page cache offsets
NFS: Clean up nfs_sync_mapping_wait()
...
38 files changed, 1105 insertions, 882 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index e33c0892457..8ea7b04c661 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1734,6 +1734,18 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_BIND34 + bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + help + Provides kernel support for querying rpcbind servers via versions 3 + and 4 of the rpcbind protocol. The kernel automatically falls back + to version 2 if a remote rpcbind service does not support versions + 3 or 4. + + If unsure, say N to get traditional behavior (version 2 rpcbind + requests only). + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index eb243edf893..2102e2d0134 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -225,16 +225,13 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp) #define SM_monres_sz 2 #define SM_unmonres_sz 1 -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - static struct rpc_procinfo nsm_procedures[] = { [SM_MON] = { .p_proc = SM_MON, .p_encode = (kxdrproc_t) xdr_encode_mon, .p_decode = (kxdrproc_t) xdr_decode_stat_res, - .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, + .p_arglen = SM_mon_sz, + .p_replen = SM_monres_sz, .p_statidx = SM_MON, .p_name = "MONITOR", }, @@ -242,7 +239,8 @@ static struct rpc_procinfo nsm_procedures[] = { .p_proc = SM_UNMON, .p_encode = (kxdrproc_t) xdr_encode_unmon, .p_decode = (kxdrproc_t) xdr_decode_stat, - .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, + .p_arglen = SM_mon_id_sz, + .p_replen = SM_unmonres_sz, .p_statidx = SM_UNMON, .p_name = "UNMONITOR", }, diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 34dae5d7073..9702956d206 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -510,17 +510,20 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) return 0; } +#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) +# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!" +#endif + /* * Buffer requirements for NLM */ #define NLM_void_sz 0 #define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) -#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename)) -#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) -/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */ +#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) +#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) #define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) -#define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz -#define NLM_holder_sz 4+NLM_netobj_sz +#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz +#define NLM_holder_sz 4+NLM_owner_sz #define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz #define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz @@ -531,10 +534,6 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) #define NLM_res_sz NLM_cookie_sz+1 #define NLM_norep_sz 0 -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - /* * For NLM, a void procedure really returns nothing */ @@ -545,7 +544,8 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ - .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ + .p_arglen = NLM_##argtype##_sz, \ + .p_replen = NLM_##restype##_sz, \ .p_statidx = NLMPROC_##proc, \ .p_name = #proc, \ } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index a7824055121..ce1efdbe1b3 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -516,17 +516,24 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) return 0; } +#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ) +# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!" +#endif + +#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN) +# error "NLM host name cannot be larger than NLM's maximum string length!" +#endif + /* * Buffer requirements for NLM */ #define NLM4_void_sz 0 #define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) -#define NLM4_caller_sz 1+XDR_QUADLEN(NLM_MAXSTRLEN) -#define NLM4_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) -/* #define NLM4_owner_sz 1+XDR_QUADLEN(NLM4_MAXOWNER) */ +#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) +#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE) #define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE) -#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_netobj_sz+NLM4_fhandle_sz -#define NLM4_holder_sz 6+NLM4_netobj_sz +#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz +#define NLM4_holder_sz 6+NLM4_owner_sz #define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz #define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz @@ -537,10 +544,6 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) #define NLM4_res_sz NLM4_cookie_sz+1 #define NLM4_norep_sz 0 -#ifndef MAX -# define MAX(a,b) (((a) > (b))? (a) : (b)) -#endif - /* * For NLM, a void procedure really returns nothing */ @@ -551,7 +554,8 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp) .p_proc = NLMPROC_##proc, \ .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ - .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ + .p_arglen = NLM4_##argtype##_sz, \ + .p_replen = NLM4_##restype##_sz, \ .p_statidx = NLMPROC_##proc, \ .p_name = #proc, \ } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2190e6c2792..5bd03b97002 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -618,7 +618,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat if (clp->cl_nfsversion == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - server->caps |= NFS_CAP_READDIRPLUS; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cd3469720cb..e59fd31c9a2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -154,6 +154,8 @@ typedef struct { decode_dirent_t decode; int plus; int error; + unsigned long timestamp; + int timestamp_valid; } nfs_readdir_descriptor_t; /* Now we cache directories properly, by stuffing the dirent @@ -195,6 +197,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) } goto error; } + desc->timestamp = timestamp; + desc->timestamp_valid = 1; SetPageUptodate(page); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; @@ -225,6 +229,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) if (IS_ERR(p)) return PTR_ERR(p); desc->ptr = p; + if (desc->timestamp_valid) + desc->entry->fattr->time_start = desc->timestamp; + else + desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; return 0; } @@ -316,6 +324,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) __FUNCTION__, desc->page_index, (long long) *desc->dir_cookie); + /* If we find the page in the page_cache, we cannot be sure + * how fresh the data is, so we will ignore readdir_plus attributes. + */ + desc->timestamp_valid = 0; page = read_cache_page(inode->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); if (IS_ERR(page)) { @@ -468,6 +480,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; + unsigned long timestamp; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -477,6 +490,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, status = -ENOMEM; goto out; } + timestamp = jiffies; desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, @@ -487,6 +501,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { + desc->timestamp = timestamp; + desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = *desc->dir_cookie; } else @@ -849,6 +865,10 @@ static int nfs_dentry_delete(struct dentry *dentry) static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { nfs_inode_return_delegation(inode); + if (S_ISDIR(inode->i_mode)) + /* drop any readdir cache as it could easily be old */ + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); drop_nlink(inode); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2877744cb60..889de60f8a8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -54,6 +54,7 @@ #include <asm/uaccess.h> #include <asm/atomic.h> +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -271,7 +272,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo bytes = min(rsize,count); result = -ENOMEM; - data = nfs_readdata_alloc(pgbase + bytes); + data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); if (unlikely(!data)) break; @@ -602,7 +603,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l bytes = min(wsize,count); result = -ENOMEM; - data = nfs_writedata_alloc(pgbase + bytes); + data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); if (unlikely(!data)) break; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6610f2b0207..ad2b40db1e6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,3 +231,15 @@ unsigned int nfs_page_length(struct page *page) } return 0; } + +/* + * Determine the number of pages in an array of length 'len' and + * with a base offset of 'base' + */ +static inline +unsigned int nfs_page_array_len(unsigned int base, size_t len) +{ + return ((unsigned long)len + (unsigned long)base + + PAGE_SIZE - 1) >> PAGE_SHIFT; +} + diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index f75fe72b416..ca5a266a314 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -133,13 +133,15 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) #define MNT_dirpath_sz (1 + 256) #define MNT_fhstatus_sz (1 + 8) +#define MNT_fhstatus3_sz (1 + 16) static struct rpc_procinfo mnt_procedures[] = { [MNTPROC_MNT] = { .p_proc = MNTPROC_MNT, .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_bufsiz = MNT_dirpath_sz << 2, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus_sz, .p_statidx = MNTPROC_MNT, .p_name = "MOUNT", }, @@ -150,7 +152,8 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_proc = MOUNTPROC3_MNT, .p_encode = (kxdrproc_t) xdr_encode_dirpath, .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_bufsiz = MNT_dirpath_sz << 2, + .p_arglen = MNT_dirpath_sz, + .p_replen = MNT_fhstatus3_sz, .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 3be4e72a022..abd9f8b4894 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -687,16 +687,13 @@ nfs_stat_to_errno(int stat) return nfs_errtbl[i].errno; } -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype, timer) \ [NFSPROC_##proc] = { \ .p_proc = NFSPROC_##proc, \ .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ - .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ + .p_arglen = NFS_##argtype##_sz, \ + .p_replen = NFS_##restype##_sz, \ .p_timer = timer, \ .p_statidx = NFSPROC_##proc, \ .p_name = #proc, \ diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 0ace092d126..b51df8eb9f0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1102,16 +1102,13 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) } #endif /* CONFIG_NFS_V3_ACL */ -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype, timer) \ [NFS3PROC_##proc] = { \ .p_proc = NFS3PROC_##proc, \ .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ - .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ + .p_arglen = NFS3_##argtype##_sz, \ + .p_replen = NFS3_##restype##_sz, \ .p_timer = timer, \ .p_statidx = NFS3PROC_##proc, \ .p_name = #proc, \ @@ -1153,7 +1150,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_proc = ACLPROC3_GETACL, .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, - .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_arglen = ACL3_getaclargs_sz, + .p_replen = ACL3_getaclres_sz, .p_timer = 1, .p_name = "GETACL", }, @@ -1161,7 +1159,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { .p_proc = ACLPROC3_SETACL, .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, - .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_arglen = ACL3_setaclargs_sz, + .p_replen = ACL3_setaclres_sz, .p_timer = 0, .p_name = "SETACL", }, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f52cf5c33c6..3b5ca1b15fe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2647,8 +2647,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (ret == 0) - nfs4_write_cached_acl(inode, buf, buflen); + nfs_zap_caches(inode); return ret; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f02d522fd78..b8c28f2380a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4546,16 +4546,13 @@ nfs4_stat_to_errno(int stat) return stat; } -#ifndef MAX -# define MAX(a, b) (((a) > (b))? (a) : (b)) -#endif - #define PROC(proc, argtype, restype) \ [NFSPROC4_CLNT_##proc] = { \ .p_proc = NFSPROC4_COMPOUND, \ .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ - .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ + .p_arglen = NFS4_##argtype##_sz, \ + .p_replen = NFS4_##restype##_sz, \ .p_statidx = NFSPROC4_CLNT_##proc, \ .p_name = #proc, \ } diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 75f819dc025..49d1008ce1d 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto) printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", program, version, NIPQUAD(servaddr)); set_sockaddr(&sin, servaddr, 0); - return rpc_getport_external(&sin, program, version, proto); + return rpcb_getport_external(&sin, program, version, proto); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ca4b1d4ff42..388950118f5 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -17,7 +17,8 @@ #include <linux/nfs_page.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> -#include <linux/writeback.h> + +#include "internal.h" #define NFS_PARANOIA 1 @@ -50,9 +51,7 @@ nfs_page_free(struct nfs_page *p) * @count: number of bytes to read/write * * The page must be locked by the caller. This makes sure we never - * create two different requests for the same page, and avoids - * a possible deadlock when we reach the hard limit on the number - * of dirty pages. + * create two different requests for the same page. * User should ensure it is safe to sleep in this function. */ struct nfs_page * @@ -63,16 +62,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct nfs_server *server = NFS_SERVER(inode); struct nfs_page *req; - /* Deal with hard limits. */ for (;;) { /* try to allocate the request struct */ req = nfs_page_alloc(); if (req != NULL) break; - /* Try to free up at least one request in order to stay - * below the hard limit - */ if (signalled() && (server->flags & NFS_MOUNT_INTR)) return ERR_PTR(-ERESTARTSYS); yield(); @@ -223,124 +218,151 @@ out: } /** - * nfs_coalesce_requests - Split coalesced requests out from a list. - * @head: source list - * @dst: destination list - * @nmax: maximum number of requests to coalesce - * - * Moves a maximum of 'nmax' elements from one list to another. - * The elements are checked to ensure that they form a contiguous set - * of pages, and that the RPC credentials are the same. + * nfs_pageio_init - initialise a page io descriptor + * @desc: pointer to descriptor + * @inode: pointer to inode + * @doio: pointer to io function + * @bsize: io block size + * @io_flags: extra parameters for the io function */ -int -nfs_coalesce_requests(struct list_head *head, struct list_head *dst, - unsigned int nmax) +void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + struct inode *inode, + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), + size_t bsize, + int io_flags) { - struct nfs_page *req = NULL; - unsigned int npages = 0; - - while (!list_empty(head)) { - struct nfs_page *prev = req; - - req = nfs_list_entry(head->next); - if (prev) { - if (req->wb_context->cred != prev->wb_context->cred) - break; - if (req->wb_context->lockowner != prev->wb_context->lockowner) - break; - if (req->wb_context->state != prev->wb_context->state) - break; - if (req->wb_index != (prev->wb_index + 1)) - break; - - if (req->wb_pgbase != 0) - break; - } - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - npages++; - if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE) - break; - if (npages >= nmax) - break; - } - return npages; + INIT_LIST_HEAD(&desc->pg_list); + desc->pg_bytes_written = 0; + desc->pg_count = 0; + desc->pg_bsize = bsize; + desc->pg_base = 0; + desc->pg_inode = inode; + desc->pg_doio = doio; + desc->pg_ioflags = io_flags; + desc->pg_error = 0; } -#define NFS_SCAN_MAXENTRIES 16 /** - * nfs_scan_dirty - Scan the radix tree for dirty requests - * @mapping: pointer to address space - * @wbc: writeback_control structure - * @dst: Destination list + * nfs_can_coalesce_requests - test two requests for compatibility + * @prev: pointer to nfs_page + * @req: pointer to nfs_page * - * Moves elements from one of the inode request lists. - * If the number of requests is set to 0, the entire address_space - * starting at index idx_start, is scanned. - * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's req_lock when calling this function + * The nfs_page structures 'prev' and 'req' are compared to ensure that the + * page data area they describe is contiguous, and that their RPC + * credentials, NFSv4 open state, and lockowners are the same. + * + * Return 'true' if this is the case, else return 'false'. */ -long nfs_scan_dirty(struct address_space *mapping, - struct writeback_control *wbc, - struct list_head *dst) +static int nfs_can_coalesce_requests(struct nfs_page *prev, + struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(mapping->host); - struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; - struct nfs_page *req; - pgoff_t idx_start, idx_end; - long res = 0; - int found, i; - - if (nfsi->ndirty == 0) + if (req->wb_context->cred != prev->wb_context->cred) return 0; - if (wbc->range_cyclic) { - idx_start = 0; - idx_end = ULONG_MAX; - } else if (wbc->range_end == 0) { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = ULONG_MAX; - } else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - } + if (req->wb_context->lockowner != prev->wb_context->lockowner) + return 0; + if (req->wb_context->state != prev->wb_context->state) + return 0; + if (req->wb_index != (prev->wb_index + 1)) + return 0; + if (req->wb_pgbase != 0) + return 0; + if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return 0; + return 1; +} - for (;;) { - unsigned int toscan = NFS_SCAN_MAXENTRIES; +/** + * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + size_t newlen = req->wb_bytes; - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, toscan, - NFS_PAGE_TAG_DIRTY); + if (desc->pg_count != 0) { + struct nfs_page *prev; - /* Did we make progress? */ - if (found <= 0) - break; + /* + * FIXME: ideally we should be able to coalesce all requests + * that are not block boundary aligned, but currently this + * is problematic for the case of bsize < PAGE_CACHE_SIZE, + * since nfs_flush_multi and nfs_pagein_multi assume you + * can have only one struct nfs_page. + */ + if (desc->pg_bsize < PAGE_SIZE) + return 0; + newlen += desc->pg_count; + if (newlen > desc->pg_bsize) + return 0; + prev = nfs_list_entry(desc->pg_list.prev); + if (!nfs_can_coalesce_requests(prev, req)) + return 0; + } else + desc->pg_base = req->wb_pgbase; + nfs_list_remove_request(req); + nfs_list_add_request(req, &desc->pg_list); + desc->pg_count = newlen; + return 1; +} - for (i = 0; i < found; i++) { - req = pgvec[i]; - if (!wbc->range_cyclic && req->wb_index > idx_end) - goto out; +/* + * Helper for nfs_pageio_add_request and nfs_pageio_complete + */ +static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) +{ + if (!list_empty(&desc->pg_list)) { + int error = desc->pg_doio(desc->pg_inode, + &desc->pg_list, + nfs_page_array_len(desc->pg_base, + desc->pg_count), + desc->pg_count, + desc->pg_ioflags); + if (error < 0) + desc->pg_error = error; + else + desc->pg_bytes_written += desc->pg_count; + } + if (list_empty(&desc->pg_list)) { + desc->pg_count = 0; + desc->pg_base = 0; + } +} - /* Try to lock request and mark it for writeback */ - if (!nfs_set_page_writeback_locked(req)) - goto next; - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfsi->ndirty--; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; - if (res == LONG_MAX) - goto out; -next: - idx_start = req->wb_index + 1; - } +/** + * nfs_pageio_add_request - Attempt to coalesce a request into a page list. + * @desc: destination io descriptor + * @req: request + * + * Returns true if the request 'req' was successfully coalesced into the + * existing list of pages 'desc'. + */ +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + while (!nfs_pageio_do_add_request(desc, req)) { + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; } -out: - WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)); - return res; + return 1; } /** + * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor + * @desc: pointer to io descriptor |