From 1de3fc12ea085690547a54b6efa01c7348f1cebd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:44 -0400 Subject: NFS: Clean up and fix page zeroing when we have short reads The code that is supposed to zero the uninitialised partial pages when the server returns a short read is currently broken: it looks at the nfs_page wb_pgbase and wb_bytes fields instead of the equivalent nfs_read_data values when deciding where to start truncating the page. Also ensure that we are more careful about setting PG_uptodate before retrying a short read: the retry will change the nfs_read_data args.pgbase and args.count. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 107 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 624ca7146b6..4b5f58da565 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -104,6 +104,28 @@ int nfs_return_empty_page(struct page *page) return 0; } +static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) +{ + unsigned int remainder = data->args.count - data->res.count; + unsigned int base = data->args.pgbase + data->res.count; + unsigned int pglen; + struct page **pages; + + if (data->res.eof == 0 || remainder == 0) + return; + /* + * Note: "remainder" can never be negative, since we check for + * this in the XDR code. + */ + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + pglen = PAGE_CACHE_SIZE - base; + if (pglen < remainder) + memclear_highpage_flush(*pages, base, pglen); + else + memclear_highpage_flush(*pages, base, remainder); +} + /* * Read a page synchronously. */ @@ -177,11 +199,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; spin_unlock(&inode->i_lock); - if (count) - memclear_highpage_flush(page, rdata->args.pgbase, count); - SetPageUptodate(page); - if (PageError(page)) - ClearPageError(page); + nfs_readpage_truncate_uninitialised_page(rdata); + if (rdata->res.eof || rdata->res.count == rdata->args.count) + SetPageUptodate(page); result = 0; io_error: @@ -436,20 +456,12 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) struct nfs_page *req = data->req; struct page *page = req->wb_page; + if (likely(task->tk_status >= 0)) + nfs_readpage_truncate_uninitialised_page(data); + else + SetPageError(page); if (nfs_readpage_result(task, data) != 0) return; - if (task->tk_status >= 0) { - unsigned int request = data->args.count; - unsigned int result = data->res.count; - - if (result < request) { - memclear_highpage_flush(page, - data->args.pgbase + result, - request - result); - } - } else - SetPageError(page); - if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); @@ -462,6 +474,40 @@ static const struct rpc_call_ops nfs_read_partial_ops = { .rpc_release = nfs_readdata_release, }; +static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) +{ + unsigned int count = data->res.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + if (unlikely(count == 0)) + return; + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageUptodate(*pages); + /* + * Was this an eof or a short read? If the latter, don't mark the page + * as uptodate yet. + */ + if (count > 0 && (data->res.eof || data->args.count == data->res.count)) + SetPageUptodate(*pages); +} + +static void nfs_readpage_set_pages_error(struct nfs_read_data *data) +{ + unsigned int count = data->args.count; + unsigned int base = data->args.pgbase; + struct page **pages; + + pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; + base &= ~PAGE_CACHE_MASK; + count += base; + for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) + SetPageError(*pages); +} + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -469,27 +515,24 @@ static const struct rpc_call_ops nfs_read_partial_ops = { static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - unsigned int count = data->res.count; + /* + * Note: nfs_readpage_result may change the values of + * data->args. In the multi-page case, we therefore need + * to ensure that we call the next nfs_readpage_set_page_uptodate() + * first in the multi-page case. + */ + if (likely(task->tk_status >= 0)) { + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_set_pages_uptodate(data); + } else + nfs_readpage_set_pages_error(data); if (nfs_readpage_result(task, data) != 0) return; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); - struct page *page = req->wb_page; - nfs_list_remove_request(req); - if (task->tk_status >= 0) { - if (count < PAGE_CACHE_SIZE) { - if (count < req->wb_bytes) - memclear_highpage_flush(page, - req->wb_pgbase + count, - req->wb_bytes - count); - count = 0; - } else - count -= PAGE_CACHE_SIZE; - SetPageUptodate(page); - } else - SetPageError(page); + nfs_list_remove_request(req); nfs_readpage_release(req); } } -- cgit v1.2.3-18-g5258 From 9d1e9232223a7f065be7f956a7b749a4cbbbe16d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:46 -0400 Subject: NFSv4: Some NFSv4 servers have broken behaviour for the change attribute The Linux NFSv4 server violates RFC3530 in that the change attribute is not guaranteed to be updated for every change to the inode. Our optimisation for checking whether or not the inode metadata has changed or not is broken too. Grr.... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d0b991a9232..e870e4aae71 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1414,9 +1414,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) { - if (nfsi->change_attr == fattr->change_attr) - goto out; + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; @@ -1444,7 +1443,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (inode->i_nlink != fattr->nlink) nfsi->cache_validity |= NFS_INO_INVALID_ATTR; -out: if (!timespec_equal(&inode->i_atime, &fattr->atime)) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; @@ -1612,15 +1610,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blksize = fattr->du.nfs2.blocksize; } - if ((fattr->valid & NFS_ATTR_FATTR_V4)) { - if (nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - nfsi->change_attr = fattr->change_attr; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = jiffies; - } else - invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA); + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = jiffies; } /* Update attrtimeo value if we're out of the unstable period */ -- cgit v1.2.3-18-g5258 From 73a3d07c1082145a3b78407bb5252df290470c4c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:47 -0400 Subject: NFS: Clean up inode metadata updates Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 ------------ fs/nfs/nfs4proc.c | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e870e4aae71..4f12c57456f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1360,12 +1360,6 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 - && nfsi->change_attr == fattr->pre_change_attr) { - nfsi->change_attr = fattr->change_attr; - nfsi->cache_change_attribute = jiffies; - } - /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { @@ -1399,9 +1393,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat int data_unstable; - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { @@ -1525,9 +1516,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - if (nfsi->fileid != fattr->fileid) goto out_fileid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d86c0db7b1e..e38a8487449 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2008,7 +2008,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * if (!status) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, res.dir_attr); - nfs_refresh_inode(inode, res.fattr); + nfs_post_op_update_inode(inode, res.fattr); } return status; -- cgit v1.2.3-18-g5258 From 0d0b5cb36faf7002a11736032313f06d6f3d881c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 May 2006 01:40:53 -0400 Subject: NFS: Optimize allocation of nfs_read/write_data structures Clean up use of page_array, and fix an off-by-one error noticed by Tom Talpey which causes kmalloc calls in cases where using the page_array is sufficient. Test plan: Normal client functional testing with r/wsize=32768. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 11 ++++------- fs/nfs/write.c | 18 +++++++----------- 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4b5f58da565..fd9018c692b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,14 +51,11 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_rdata_mempool); p = NULL; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4cfada2cc09..a515ec714bb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -98,11 +98,10 @@ struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kzalloc(size, GFP_NOFS); + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); if (!p->pagevec) { mempool_free(p, nfs_commit_mempool); p = NULL; @@ -126,14 +125,11 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!p->pagevec) { mempool_free(p, nfs_wdata_mempool); p = NULL; } -- cgit v1.2.3-18-g5258 From f1bb0b92ba2cdfffe6e437f7a7da53138cf08d52 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:55 -0400 Subject: NFS: Fix page cache revalidation Fix up a bug in the handling of NFS_INO_REVAL_PAGECACHE: make sure that nfs_update_inode() clears it when we're sure we're not racing with other updates. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4f12c57456f..eddd0e982d2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1406,18 +1406,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat nfs_wcc_update_inode(inode, fattr); if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - nfsi->change_attr != fattr->change_attr) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + nfsi->change_attr != fattr->change_attr) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; - if (!data_unstable) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; - } + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); @@ -1459,7 +1453,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; if (time_after(fattr->time_start, nfsi->last_updated)) status = nfs_update_inode(inode, fattr); else @@ -1484,7 +1477,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; goto out; } status = nfs_update_inode(inode, fattr); @@ -1534,7 +1527,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Are we racing with known updates of the metadata on the server? */ data_stable = nfs_verify_change_attribute(inode, fattr->time_start); if (data_stable) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); -- cgit v1.2.3-18-g5258 From 38478b24e37587f1c4fedf8ac070ca54f052ed28 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:57 -0400 Subject: NFS: More page cache revalidation fixups Whenever the directory changes, we want to make sure that we always invalidate its page cache. Fix up update_changeattr() and nfs_mark_for_revalidate() so that they do so. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e38a8487449..ef4c6cccf95 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -185,15 +185,15 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp spin_unlock(&clp->cl_lock); } -static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinfo) +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { - struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_inode *nfsi = NFS_I(dir); - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + spin_lock(&dir->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; if (cinfo->before == nfsi->change_attr && cinfo->atomic) nfsi->change_attr = cinfo->after; - spin_unlock(&inode->i_lock); + spin_unlock(&dir->i_lock); } struct nfs4_opendata { -- cgit v1.2.3-18-g5258 From 44b11874ff583b6e766a05856b04f3c492c32b84 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:40:59 -0400 Subject: NFS: Separate metadata and page cache revalidation mechanisms Separate out the function of revalidating the inode metadata, and revalidating the mapping. The former may be called by lookup(), and only really needs to check that permissions, ctime, etc haven't changed whereas the latter needs only done when we want to read data from the page cache, and may need to sync and then invalidate the mapping. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/file.c | 24 +++--------------------- fs/nfs/inode.c | 16 +++++++++++----- fs/nfs/symlink.c | 2 +- 4 files changed, 16 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cae74dd4c7f..1d3d8922a66 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -528,7 +528,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) lock_kernel(); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) { unlock_kernel(); return res; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index fade02c15e6..63154070145 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -126,23 +126,6 @@ nfs_file_release(struct inode *inode, struct file *filp) return NFS_PROTO(inode)->file_release(inode, filp); } -/** - * nfs_revalidate_file - Revalidate the page cache & related metadata - * @inode - pointer to inode struct - * @file - pointer to file - */ -static int nfs_revalidate_file(struct inode *inode, struct file *filp) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int retval = 0; - - if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)) - || nfs_attribute_timeout(inode)) - retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - nfs_revalidate_mapping(inode, filp->f_mapping); - return 0; -} - /** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct @@ -228,7 +211,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_file(inode, iocb->ki_filp); + result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); @@ -247,7 +230,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_file(inode, filp); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -263,7 +246,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_file(inode, file); + status = nfs_revalidate_mapping(inode, file->f_mapping); if (!status) status = generic_file_mmap(file, vma); return status; @@ -373,7 +356,6 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t if (result) goto out; } - nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index eddd0e982d2..69036ef3986 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1220,7 +1220,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = -ESTALE; /* Do we trust the cached ESTALE? */ if (NFS_ATTRTIMEO(inode) != 0) { - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) { /* no */ } else goto out; @@ -1251,8 +1251,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } spin_unlock(&inode->i_lock); - nfs_revalidate_mapping(inode, inode->i_mapping); - if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); @@ -1287,7 +1285,7 @@ int nfs_attribute_timeout(struct inode *inode) int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); @@ -1298,9 +1296,16 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) * @inode - pointer to host inode * @mapping - pointer to mapping */ -void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + int ret = 0; + + if (NFS_STALE(inode)) + ret = -ESTALE; + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode)) + ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); @@ -1321,6 +1326,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); } + return ret; } /** diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 18dc95b0b64..636c479995b 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -52,7 +52,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); + void *err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, -- cgit v1.2.3-18-g5258 From 1842bfb447cea8b344fd91af97fb6d604ecb11fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 25 May 2006 01:41:01 -0400 Subject: NFS: Fix up inode revalidation accounting Currently, we are accounting for all calls to nfs_revalidate_inode(), but not to nfs_revalidate_mapping(), or nfs_lookup_verify_inode(), etc... Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 69036ef3986..3200358195b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1207,6 +1207,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); lock_kernel(); if (!inode || is_bad_inode(inode)) goto out_nowait; @@ -1284,7 +1285,6 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; -- cgit v1.2.3-18-g5258 From 4814f56d19137b3b9fa8e00e1d332b3683b950de Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 25 May 2006 01:41:03 -0400 Subject: NFSv3: Client-side nfsacl caching fix Fix two errors in the client-side acl cache: First, when nfs3_proc_getacl requests only the default acl of a file and the access acl is not cached already, a NULL access acl entry is cached instead of ERR_PTR(-EAGAIN) ("not cached"). Second, update the cached acls in nfs3_proc_setacls: nfs_refresh_inode does not always invalidate the cached acls, and when it does not, the cached acls get out of sync. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 33287879bd2..7322da4d205 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -172,8 +172,10 @@ static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, inode->i_ino, acl, dfacl); spin_lock(&inode->i_lock); __nfs3_forget_cached_acls(NFS_I(inode)); - nfsi->acl_access = posix_acl_dup(acl); - nfsi->acl_default = posix_acl_dup(dfacl); + if (!IS_ERR(acl)) + nfsi->acl_access = posix_acl_dup(acl); + if (!IS_ERR(dfacl)) + nfsi->acl_default = posix_acl_dup(dfacl); spin_unlock(&inode->i_lock); } @@ -254,7 +256,9 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) res.acl_access = NULL; } } - nfs3_cache_acls(inode, res.acl_access, res.acl_default); + nfs3_cache_acls(inode, + (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL), + (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL)); switch(type) { case ACL_TYPE_ACCESS: @@ -329,6 +333,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, switch (status) { case 0: status = nfs_refresh_inode(inode, &fattr); + nfs3_cache_acls(inode, acl, dfacl); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: -- cgit v1.2.3-18-g5258 From 3873bc50e2271504da45799257f69222774d9550 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 27 May 2006 03:31:12 +0400 Subject: NFSv4: really return status from decode_recall_args() Signed-off-by: Alexey Dobriyan Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 05c38cf40b6..c92991328d9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -202,7 +202,7 @@ static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xd status = decode_fh(xdr, &args->fh); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, status); - return 0; + return status; } static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) -- cgit v1.2.3-18-g5258 From c04871e6345e4c6dfda564e302d7fd8c66420fd5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 30 May 2006 16:28:58 -0400 Subject: NFSv4: remove obviously bogus comparison from decode_getacl We just set *acl_len to zero, and attrlen is unsigned, so this comparison is clearly bogus. I have no idea what I was thinking. Fixes a bug that caused getacl to fail over krb5p. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7c5d70efe72..7e9a840057f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3350,8 +3350,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, attrlen, recvd); return -EINVAL; } - if (attrlen <= *acl_len) - xdr_read_pages(xdr, attrlen); + xdr_read_pages(xdr, attrlen); *acl_len = attrlen; } else status = -EOPNOTSUPP; -- cgit v1.2.3-18-g5258 From d2ccddf042c403b146159beea438c6bfc4a445e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 31 May 2006 01:13:38 -0400 Subject: NFS: Flesh out nfs_invalidate_page() In the case of a call to truncate_inode_pages(), we should really try to cancel any pending writes on the page. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 6 +++++- fs/nfs/pagelist.c | 47 ++++++++++++++++++++++++++++------------------- fs/nfs/write.c | 27 ++++++++++++++++++++++++--- 3 files changed, 57 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 63154070145..106ef0dec04 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -303,7 +303,11 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse static void nfs_invalidate_page(struct page *page, unsigned long offset) { - /* FIXME: we really should cancel any unstarted writes on this page */ + struct inode *inode = page->mapping->host; + + /* Cancel any unstarted writes on this page */ + if (offset == 0) + nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE); } static int nfs_release_page(struct page *page, gfp_t gfp) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 106aca388eb..656481c0daa 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -325,6 +325,7 @@ out: /** * nfs_scan_list - Scan a list for matching requests + * @nfsi: NFS inode * @head: One of the NFS inode request lists * @dst: Destination list * @idx_start: lower bound of page->index to scan @@ -336,14 +337,15 @@ out: * The requests are *not* checked to ensure that they form a contiguous set. * You must be holding the inode's req_lock when calling this function */ -int -nfs_scan_list(struct list_head *head, struct list_head *dst, - unsigned long idx_start, unsigned int npages) +int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, + struct list_head *dst, unsigned long idx_start, + unsigned int npages) { - struct list_head *pos, *tmp; - struct nfs_page *req; - unsigned long idx_end; - int res; + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; res = 0; if (npages == 0) @@ -351,21 +353,28 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, else idx_end = idx_start + npages - 1; - list_for_each_safe(pos, tmp, head) { - - req = nfs_list_entry(pos); - - if (req->wb_index < idx_start) - continue; - if (req->wb_index > idx_end) + for (;;) { + found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, + NFS_SCAN_MAXENTRIES); + if (found <= 0) break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + idx_start = req->wb_index + 1; + if (req->wb_list_head != head) + continue; + if (nfs_set_page_writeback_locked(req)) { + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } - if (!nfs_set_page_writeback_locked(req)) - continue; - nfs_list_remove_request(req); - nfs_list_add_request(req, dst); - res++; } +out: return res; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a515ec714bb..e03abbd8302 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -579,6 +579,17 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un return ret; } +static void nfs_cancel_requests(struct list_head *head) +{ + struct nfs_page *req; + while(!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_inode_remove_request(req); + nfs_clear_page_writeback(req); + } +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -623,7 +634,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st int res = 0; if (nfsi->ncommit != 0) { - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); nfsi->ncommit -= res; if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); @@ -1491,15 +1502,25 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, pages = nfs_scan_dirty(inode, &head, idx_start, npages); if (pages != 0) { spin_unlock(&nfsi->req_lock); - ret = nfs_flush_list(inode, &head, pages, how); + if (how & FLUSH_INVALIDATE) + nfs_cancel_requests(&head); + else + ret = nfs_flush_list(inode, &head, pages, how); spin_lock(&nfsi->req_lock); continue; } if (nocommit) break; - pages = nfs_scan_commit(inode, &head, 0, 0); + pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; + if (how & FLUSH_INVALIDATE) { + spin_unlock(&nfsi->req_lock); + nfs_cancel_requests(&head); + spin_lock(&nfsi->req_lock); + continue; + } + pages += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); ret = nfs_commit_list(inode, &head, how); spin_lock(&nfsi->req_lock); -- cgit v1.2.3-18-g5258 From da6d503aa0a75ec44f17d985a2b500077e7f6a74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 1 Jun 2006 17:26:35 -0400 Subject: NFS: Remove nfs_delete_inode() Now that we have a real nfs_invalidate_page() to ensure that truncate_inode_pages() does the right thing when there are pending dirty pages, we can get rid of nfs_delete_inode(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3200358195b..9ff039f9a83 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -62,7 +62,6 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); static int nfs_write_inode(struct inode *,int); -static void nfs_delete_inode(struct inode *); static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); @@ -76,7 +75,6 @@ static struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -146,31 +144,16 @@ nfs_write_inode(struct inode *inode, int sync) return 0; } -static void -nfs_delete_inode(struct inode * inode) -{ - dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); - - truncate_inode_pages(&inode->i_data, 0); - - nfs_wb_all(inode); - /* - * The following should never happen... - */ - if (nfs_have_writebacks(inode)) { - printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); - } - - clear_inode(inode); -} - static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct rpc_cred *cred; - nfs_wb_all(inode); + /* + * The following should never happen... + */ + BUG_ON(nfs_have_writebacks(inode)); BUG_ON (!list_empty(&nfsi->open_files)); nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; @@ -1821,7 +1804,6 @@ static struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, - .delete_inode = nfs_delete_inode, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, -- cgit v1.2.3-18-g5258 From bb4a58bf46473e3e83d84054bbc110db3a0f85e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:15 -0400 Subject: VFS: Add GPL_EXPORTED function vfs_kern_mount() do_kern_mount() does not allow the kernel to use private mount interfaces without exposing the same interfaces to userland. The problem is that the filesystem is referenced by name, thus meaning that it and its mount interface must be registered in the global filesystem list. vfs_kern_mount() passes the struct file_system_type as an explicit parameter in order to overcome this limitation. Signed-off-by: Trond Myklebust --- fs/super.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index a66f66bb804..848be4fc67a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -800,17 +800,13 @@ struct super_block *get_sb_single(struct file_system_type *fs_type, EXPORT_SYMBOL(get_sb_single); struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { - struct file_system_type *type = get_fs_type(fstype); struct super_block *sb = ERR_PTR(-ENOMEM); struct vfsmount *mnt; int error; char *secdata = NULL; - if (!type) - return ERR_PTR(-ENODEV); - mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -841,7 +837,6 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) mnt->mnt_parent = mnt; up_write(&sb->s_umount); free_secdata(secdata); - put_filesystem(type); return mnt; out_sb: up_write(&sb->s_umount); @@ -852,10 +847,23 @@ out_free_secdata: out_mnt: free_vfsmnt(mnt); out: - put_filesystem(type); return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + put_filesystem(type); + return mnt; +} + EXPORT_SYMBOL_GPL(do_kern_mount); struct vfsmount *kern_mount(struct file_system_type *type) -- cgit v1.2.3-18-g5258 From 1f5ce9e93aa96a867f195ed45f6f77935175f12e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:16 -0400 Subject: VFS: Unexport do_kern_mount() and clean up simple_pin_fs() Replace all module uses with the new vfs_kern_mount() interface, and fix up simple_pin_fs(). Signed-off-by: Trond Myklebust --- fs/afs/mntpt.c | 2 +- fs/afs/super.c | 2 +- fs/afs/super.h | 2 ++ fs/binfmt_misc.c | 3 ++- fs/configfs/mount.c | 2 +- fs/debugfs/inode.c | 2 +- fs/libfs.c | 4 ++-- fs/super.c | 4 +--- 8 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 4e6eeb59b83..7b6dc03caf4 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -210,7 +210,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* try and do the mount */ kdebug("--- attempting mount %s -o %s ---", devname, options); - mnt = do_kern_mount("afs", 0, devname, options); + mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); kdebug("--- mount result %p ---", mnt); free_page((unsigned long) devname); diff --git a/fs/afs/super.c b/fs/afs/super.c index 53c56e7231a..93a7821db0d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,7 +48,7 @@ static void afs_put_super(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); -static struct file_system_type afs_fs_type = { +struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .get_sb = afs_get_sb, diff --git a/fs/afs/super.h b/fs/afs/super.h index ac11362f4e9..32de8cc6fae 100644 --- a/fs/afs/super.h +++ b/fs/afs/super.h @@ -38,6 +38,8 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) return sb->s_fs_info; } +extern struct file_system_type afs_fs_type; + #endif /* __KERNEL__ */ #endif /* _LINUX_AFS_SUPER_H */ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index d73d75591a3..c0a909e1d29 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -55,6 +55,7 @@ typedef struct { } Node; static DEFINE_RWLOCK(entries_lock); +static struct file_system_type bm_fs_type; static struct vfsmount *bm_mnt; static int entry_count; @@ -638,7 +639,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, if (!inode) goto out2; - err = simple_pin_fs("binfmt_misc", &bm_mnt, &entry_count); + err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count); if (err) { iput(inode); inode = NULL; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index f920d30478e..be5d86ae56f 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -118,7 +118,7 @@ static struct file_system_type configfs_fs_type = { int configfs_pin_fs(void) { - return simple_pin_fs("configfs", &configfs_mount, + return simple_pin_fs(&configfs_fs_type, &configfs_mount, &configfs_mnt_count); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b55b4ea9a67..90f9417181f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -199,7 +199,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, pr_debug("debugfs: creating file '%s'\n",name); - error = simple_pin_fs("debugfs", &debugfs_mount, &debugfs_mount_count); + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) goto exit; diff --git a/fs/libfs.c b/fs/libfs.c index 7145ba7a48d..4a3ec9ad8be 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -424,13 +424,13 @@ out: static DEFINE_SPINLOCK(pin_fs_lock); -int simple_pin_fs(char *name, struct vfsmount **mount, int *count) +int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) { struct vfsmount *mnt = NULL; spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = do_kern_mount(name, 0, name, NULL); + mnt = vfs_kern_mount(type, 0, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/super.c b/fs/super.c index 848be4fc67a..15f2afdbf82 100644 --- a/fs/super.c +++ b/fs/super.c @@ -864,11 +864,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) return mnt; } -EXPORT_SYMBOL_GPL(do_kern_mount); - struct vfsmount *kern_mount(struct file_system_type *type) { - return do_kern_mount(type->name, 0, type->name, NULL); + return vfs_kern_mount(type, 0, type->name, NULL); } EXPORT_SYMBOL(kern_mount); -- cgit v1.2.3-18-g5258 From 5528f911b4c43a5de5da34bcbd7e3f2a62503617 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:17 -0400 Subject: VFS: Add shrink_submounts() Allow a submount to be marked as being 'shrinkable' by means of the vfsmount->mnt_flags, and then add a function 'shrink_submounts()' which attempts to recursively unmount these submounts. Signed-off-by: Trond Myklebust --- fs/namespace.c | 124 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 99 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index bf478addb85..b22e469ab56 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1162,6 +1162,40 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, } } +/* + * go through the vfsmounts we've just consigned to the graveyard to + * - check that they're still dead + * - delete the vfsmount from the appropriate namespace under lock + * - dispose of the corpse + */ +static void expire_mount_list(struct list_head *graveyard, struct list_head *mounts) +{ + struct namespace *namespace; + struct vfsmount *mnt; + + while (!list_empty(graveyard)) { + LIST_HEAD(umounts); + mnt = list_entry(graveyard->next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); + + /* don't do anything if the namespace is dead - all the + * vfsmounts from it are going away anyway */ + namespace = mnt->mnt_namespace; + if (!namespace || !namespace->root) + continue; + get_namespace(namespace); + + spin_unlock(&vfsmount_lock); + down_write(&namespace_sem); + expire_mount(mnt, mounts, &umounts); + up_write(&namespace_sem); + release_mounts(&umounts); + mntput(mnt); + put_namespace(namespace); + spin_lock(&vfsmount_lock); + } +} + /* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came @@ -1169,7 +1203,6 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts, */ void mark_mounts_for_expiry(struct list_head *mounts) { - struct namespace *namespace; struct vfsmount *mnt, *next; LIST_HEAD(graveyard); @@ -1193,38 +1226,79 @@ void mark_mounts_for_expiry(struct list_head *mounts) list_move(&mnt->mnt_expire, &graveyard); } - /* - * go through the vfsmounts we've just consigned to the graveyard to - * - check that they're still dead - * - delete the vfsmount from the appropriate namespace under lock - * - dispose of the corpse - */ - while (!list_empty(&graveyard)) { - LIST_HEAD(umounts); - mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); - list_del_init(&mnt->mnt_expire); + expire_mount_list(&graveyard, mounts); - /* don't do anything if the namespace is dead - all the - * vfsmounts from it are going away anyway */ - namespace = mnt->mnt_namespace; - if (!namespace || !namespace->root) + spin_unlock(&vfsmount_lock); +} + +EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); + +/* + * Ripoff of 'select_parent()' + * + * search the list of submounts for a given mountpoint, and move any + * shrinkable submounts to the 'graveyard' list. + */ +static int select_submounts(struct vfsmount *parent, struct list_head *graveyard) +{ + struct vfsmount *this_parent = parent; + struct list_head *next; + int found = 0; + +repeat: + next = this_parent->mnt_mounts.next; +resume: + while (next != &this_parent->mnt_mounts) { + struct list_head *tmp = next; + struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child); + + next = tmp->next; + if (!(mnt->mnt_flags & MNT_SHRINKABLE)) continue; - get_namespace(namespace); + /* + * Descend a level if the d_mounts list is non-empty. + */ + if (!list_empty(&mnt->mnt_mounts)) { + this_parent = mnt; + goto repeat; + } - spin_unlock(&vfsmount_lock); - down_write(&namespace_sem); - expire_mount(mnt, mounts, &umounts); - up_write(&namespace_sem); - release_mounts(&umounts); - mntput(mnt); - put_namespace(namespace); - spin_lock(&vfsmount_lock); + if (!propagate_mount_busy(mnt, 1)) { + mntget(mnt); + list_move_tail(&mnt->mnt_expire, graveyard); + found++; + } } + /* + * All done at this level ... ascend and resume the search + */ + if (this_parent != parent) { + next = this_parent->mnt_child.next; + this_parent = this_parent->mnt_parent; + goto resume; + } + return found; +} + +/* + * process a list of expirable mountpoints with the intent of discarding any + * submounts of a specific parent mountpoint + */ +void shrink_submounts(struct vfsmount *mountpoint, struct list_head *mounts) +{ + LIST_HEAD(graveyard); + int found; + + spin_lock(&vfsmount_lock); + + /* extract submounts of 'mountpoint' from the expiration list */ + while ((found = select_submounts(mountpoint, &graveyard)) != 0) + expire_mount_list(&graveyard, mounts); spin_unlock(&vfsmount_lock); } -EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); +EXPORT_SYMBOL_GPL(shrink_submounts); /* * Some copy_from_user() implementations do not return the exact number of -- cgit v1.2.3-18-g5258 From 8b512d9a88875affe584bb3d2a7a235f84343b9e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:18 -0400 Subject: VFS: Remove dependency of ->umount_begin() call on MNT_FORCE Allow filesystems to decide to perform pre-umount processing whether or not MNT_FORCE is set. Signed-off-by: Trond Myklebust --- fs/9p/vfs_super.c | 7 ++++--- fs/cifs/cifsfs.c | 6 ++++-- fs/fuse/inode.c | 5 +++-- fs/namespace.c | 4 ++-- fs/nfs/inode.c | 14 +++++++++----- 5 files changed, 22 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 61c599b4a1e..00c1f6baf87 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -253,11 +253,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) } static void -v9fs_umount_begin(struct super_block *sb) +v9fs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct v9fs_session_info *v9ses = sb->s_fs_info; + struct v9fs_session_info *v9ses = vfsmnt->mnt_sb->s_fs_info; - v9fs_session_cancel(v9ses); + if (flags & MNT_FORCE) + v9fs_session_cancel(v9ses); } static struct super_operations v9fs_super_ops = { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c262d8874ce..3fdc2258f44 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -402,12 +402,14 @@ static struct quotactl_ops cifs_quotactl_ops = { #endif #ifdef CONFIG_CIFS_EXPERIMENTAL -static void cifs_umount_begin(struct super_block * sblock) +static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) { struct cifs_sb_info *cifs_sb; struct cifsTconInfo * tcon; - cifs_sb = CIFS_SB(sblock); + if (!(flags & MNT_FORCE)) + return; + cifs_sb = CIFS_SB(vfsmnt->mnt_sb); if(cifs_sb == NULL) return; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7627022446b..13ebe5780c9 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -195,9 +195,10 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, return inode; } -static void fuse_umount_begin(struct super_block *sb) +static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) { - fuse_abort_conn(get_fuse_conn_super(sb)); + if (flags & MNT_FORCE) + fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb)); } static void fuse_put_super(struct super_block *sb) diff --git a/fs/namespace.c b/fs/namespace.c index b22e469ab56..6bb0b85293e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -576,8 +576,8 @@ static int do_umount(struct vfsmount *mnt, int flags) */ lock_kernel(); - if ((flags & MNT_FORCE) && sb->s_op->umount_begin) - sb->s_op->umount_begin(sb); + if (sb->s_op->umount_begin) + sb->s_op->umount_begin(mnt, flags); unlock_kernel(); /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9ff039f9a83..fda2b496617 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -63,7 +63,7 @@ static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); static int nfs_write_inode(struct inode *,int); static void nfs_clear_inode(struct inode *); -static void nfs_umount_begin(struct super_block *); +static void nfs_umount_begin(struct vfsmount *, int); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); static int nfs_show_stats(struct seq_file *, struct vfsmount *); @@ -162,15 +162,19 @@ nfs_clear_inode(struct inode *inode) BUG_ON(atomic_read(&nfsi->data_updates) != 0); } -void -nfs_umount_begin(struct super_block *sb) +static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) { - struct rpc_clnt *rpc = NFS_SB(sb)->client; + struct nfs_server *server; + struct rpc_clnt *rpc; + if (!(flags & MNT_FORCE)) + return; /* -EIO all pending I/O */ + server = NFS_SB(vfsmnt->mnt_sb); + rpc = server->client; if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); - rpc = NFS_SB(sb)->client_acl; + rpc = server->client_acl; if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } -- cgit v1.2.3-18-g5258 From 8b4bdcf8995dd92b23d2ec22b32aee8fbbb50e1c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:19 -0400 Subject: NFS: Store the file system "fsid" value in the NFS super block. This should enable us to detect if we are crossing a mountpoint in the case where the server is exporting "nohide" mounts. Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 1 - fs/nfs/inode.c | 8 ++++++++ fs/nfs/nfs2xdr.c | 3 ++- fs/nfs/nfs3xdr.c | 3 ++- fs/nfs/nfs4xdr.c | 4 ++-- 5 files changed, 14 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 3fab5b0cfc5..b81e7ed3c90 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -47,7 +47,6 @@ #include #include -#include #include #include diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fda2b496617..1a809f6f898 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -236,6 +236,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f return ERR_PTR(error); } + server->fsid = fsinfo->fattr->fsid; return nfs_fhget(sb, rootfh, fsinfo->fattr); } @@ -1493,6 +1494,7 @@ out: */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; unsigned int invalid = 0; @@ -1511,6 +1513,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; + server = NFS_SERVER(inode); + /* Update the fsid if and only if this is the root directory */ + if (inode == inode->i_sb->s_root->d_inode + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + /* * Update the read time so we don't revalidate too often. */ diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index f0015fa876e..a7ed88f97a1 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -131,7 +131,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) fattr->du.nfs2.blocksize = ntohl(*p++); rdev = ntohl(*p++); fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; fattr->fileid = ntohl(*p++); p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ec233619687..f70eee2cac0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -166,7 +166,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) fattr->rdev = 0; - p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; p = xdr_decode_hyper(p, &fattr->fileid); p = xdr_decode_time3(p, &fattr->atime); p = xdr_decode_time3(p, &fattr->mtime); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7e9a840057f..0d579467594 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2217,7 +2217,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } -static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) { uint32_t *p; @@ -2863,7 +2863,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) goto xdr_error; - if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) + if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) goto xdr_error; if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; -- cgit v1.2.3-18-g5258 From 55a975937d40cac582e981ddc8ed783b3dcc043c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:19 -0400 Subject: NFS: Ensure the client submounts, when it crosses a server mountpoint. Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 3 +- fs/nfs/dir.c | 16 +++ fs/nfs/inode.c | 303 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/namespace.c | 89 ++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 2 +- 6 files changed, 409 insertions(+), 5 deletions(-) create mode 100644 fs/nfs/namespace.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ec61fd56a1a..d9d494cee38 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o + proc.o read.o symlink.o unlink.o write.o \ + namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1d3d8922a66..3ddda6f7ecc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -868,6 +868,17 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) return (nd->intent.open.flags & O_EXCL) != 0; } +static inline int nfs_reval_fsid(struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid on root dir */ + return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -900,6 +911,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } + error = nfs_reval_fsid(dir, &fhandle, &fattr); + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; + } inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); res = (struct dentry *)inode; if (IS_ERR(res)) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1a809f6f898..47167ab64f5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -221,6 +221,14 @@ nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) return nfs_block_bits(bsize, nrbitsp); } +static inline void +nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + /* * Obtain the root inode of the file system. */ @@ -331,9 +339,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; @@ -877,6 +883,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); + /* Deal with crossing mountpoints */ + if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -1650,6 +1661,141 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * File system information */ +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +static char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, + struct super_block *(*clone_client)(struct nfs_server *, struct nfs_clone_mount *)) +{ + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + void *err = ERR_PTR(-ENOMEM); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + len = strlen(parent->hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, parent->hostname, len); + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + if (rpciod_up() != 0) + goto free_hostname; + + sb = clone_client(server, data); + if (IS_ERR((err = sb)) || sb->s_root) + goto kill_rpciod; + + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + err = ERR_PTR(-ENOMEM); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto out_deactivate; + + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out_deactivate; + if (!IS_ERR(parent->client_sys)) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out_deactivate; + } + if (!IS_ERR(parent->client_acl)) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out_deactivate; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out_deactivate; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return sb; +out_put_root: + iput(root_inode); +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return (struct super_block *)err; +kill_rpciod: + rpciod_down(); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return (struct super_block *)err; +} + static int nfs_set_super(struct super_block *s, void *data) { s->s_fs_info = data; @@ -1807,6 +1953,31 @@ static struct file_system_type nfs_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct super_block *nfs_clone_client(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb; + + sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; +} + +static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs_clone_client); +} + +static struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + #ifdef CONFIG_NFS_V4 static void nfs4_clear_inode(struct inode *); @@ -2156,6 +2327,75 @@ static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, &nfs_idmap_cache_timeout, 0644); +/* Constructs the SERVER-side path */ +static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +} + +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_client(struct nfs_server *server, struct nfs_clone_mount *data) +{ + const struct dentry *dentry = data->dentry; + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_clone_client); +} + +static struct file_system_type clone_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_clone_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -2183,12 +2423,69 @@ static inline void unregister_nfs4fs(void) nfs_unregister_sysctl(); } #else +#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL) #define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() #endif +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + mnt = (struct vfsmount *)devname; + if (IS_ERR(devname)) + goto free_page; + switch (NFS_SB(mnt_parent->mnt_sb)->rpc_ops->version) { + case 2: + case 3: + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + break; + case 4: + mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, &mountdata); + break; + default: + BUG(); + } +free_page: + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c new file mode 100644 index 00000000000..a155505c36f --- /dev/null +++ b/fs/nfs/namespace.c @@ -0,0 +1,89 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust + * + * NFS namespace + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + BUG_ON(IS_ROOT(dentry)); + dprintk("%s: enter\n", __FUNCTION__); + dput(nd->dentry); + nd->dentry = dget(dentry); + if (d_mountpoint(nd->dentry)) + goto out_follow; + /* Look it up again */ + parent = dget_parent(nd->dentry); + err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) + goto out_err; + + mntget(mnt); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, NULL); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + mntput(nd->mnt); + dput(nd->dentry); + nd->mnt = mnt; + nd->dentry = dget(mnt->mnt_root); +out: + dprintk("%s: done, returned %d\n", __FUNCTION__, err); + return ERR_PTR(err); +out_err: + path_release(nd); + goto out; +out_follow: + while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + ; + err = 0; + goto out; +} + +struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0f5e4e7cdde..307832fd1a4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -217,6 +217,7 @@ extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef4c6cccf95..308407205e6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1331,7 +1331,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f return status; } -static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_exception exception = { }; int err; -- cgit v1.2.3-18-g5258 From 51d8fa6a109589d522c18a8e9bf3fb167a91b1bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:20 -0400 Subject: NFS: Add timeout to submounts Make automounted partitions expire using the mark_mounts_for_expiry() function. The timeout is controlled via a sysctl. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 +++ fs/nfs/namespace.c | 25 ++++++++++++++++++++++++- fs/nfs/sysctl.c | 10 ++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 47167ab64f5..3eea556d8f5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -167,6 +167,7 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) struct nfs_server *server; struct rpc_clnt *rpc; + shrink_submounts(vfsmnt, &nfs_automount_list); if (!(flags & MNT_FORCE)) return; /* -EIO all pending I/O */ @@ -1943,6 +1944,7 @@ static void nfs_kill_super(struct super_block *s) nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs_fs_type = { @@ -2288,6 +2290,7 @@ static void nfs4_kill_super(struct super_block *sb) nfs_free_iostats(server->io_stats); kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs4_fs_type = { diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index a155505c36f..e426516c111 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -18,6 +18,11 @@ #define NFSDBG_FACILITY NFSDBG_VFS +LIST_HEAD(nfs_automount_list); +static void nfs_expire_automounts(void *list); +static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + /* * nfs_follow_mountpoint - handle crossing a mountpoint on the server * @dentry - dentry of mountpoint @@ -59,7 +64,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_err; mntget(mnt); - err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, NULL); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list); if (err < 0) { mntput(mnt); if (err == -EBUSY) @@ -70,6 +75,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) dput(nd->dentry); nd->mnt = mnt; nd->dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); out: dprintk("%s: done, returned %d\n", __FUNCTION__, err); return ERR_PTR(err); @@ -87,3 +93,20 @@ struct inode_operations nfs_mountpoint_inode_operations = { .follow_link = nfs_follow_mountpoint, .getattr = nfs_getattr, }; + +static void nfs_expire_automounts(void *data) +{ + struct list_head *list = (struct list_head *)data; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) { + cancel_delayed_work(&nfs_automount_task); + flush_scheduled_work(); + } +} diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 4c486eb867c..db61e51bb15 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "callback.h" @@ -46,6 +47,15 @@ static ctl_table nfs_cb_sysctls[] = { .strategy = &sysctl_jiffies, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, { .ctl_name = 0 } }; -- cgit v1.2.3-18-g5258 From 683b57b435326eb512c7305892683b6205669448 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:22 -0400 Subject: NFSv4: Implement the fs_locations function call NFSv4 allows for the fact that filesystems may be replicated across several servers or that they may be migrated to a backup server in case of failure of the primary server. fs_locations is an NFSv4 operation for retrieving information about the location of migrated and/or replicated filesystems. Based on an initial implementation by Jiaying Zhang Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 + fs/nfs/nfs4proc.c | 29 ++++++++++++++ fs/nfs/nfs4xdr.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 307832fd1a4..5b765117121 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -218,6 +218,8 @@ extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 308407205e6..768514dc0c4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3570,6 +3570,35 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) return len; } +int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page) +{ + struct nfs_server *server = NFS_SERVER(dir); + u32 bitmask[2] = { + [0] = server->attr_bitmask[0] | FATTR4_WORD0_FS_LOCATIONS, + [1] = server->attr_bitmask[1], + }; + struct nfs4_fs_locations_arg args = { + .dir_fh = NFS_FH(dir), + .name = &dentry->d_name, + .page = page, + .bitmask = bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &fs_locations, + }; + int status; + + dprintk("%s: start\n", __FUNCTION__); + fs_locations->fattr.valid = 0; + fs_locations->server = server; + status = rpc_call_sync(server->client, &msg, 0); + dprintk("%s: returned status = %d\n", __FUNCTION__, status); + return status; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0d579467594..7add3137b6b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -411,6 +411,15 @@ static int nfs_stat_to_errno(int); #define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define NFS4_enc_fs_locations_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_fs_locations_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -2002,6 +2011,38 @@ out: return status; } +/* + * Encode FS_LOCATIONS request + */ +static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_lookup(&xdr, args->name)) != 0) + goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + /* set up reply + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_LOOKUP + status + OP_GETATTR + status = 7 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + 0, PAGE_SIZE); +out: + return status; +} + /* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" @@ -2036,7 +2077,7 @@ out: } \ } while (0) -static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { uint32_t *p; @@ -2087,7 +2128,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) { uint32_t *p; - uint32_t strlen; + unsigned int strlen; char *str; READ_BUF(12); @@ -2336,6 +2377,45 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin return status; } +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fs_locations *res) +{ + int n; + uint32_t *p; + int status = -EIO; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U))) + goto out; + status = 0; + if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) + goto out; + status = decode_opaque_inline(xdr, &res->fs_pathlen, &res->fs_path); + if (unlikely(status != 0)) + goto out; + READ_BUF(4); + READ32(n); + if (n <= 0) + goto out_eio; + res->nlocations = 0; + while (res->nlocations < n) { + struct nfs_fs_location *loc = &res->locations[res->nlocations]; + + status = decode_opaque_inline(xdr, &loc->serverlen, &loc->server); + if (unlikely(status != 0)) + goto out_eio; + status = decode_opaque_inline(xdr, &loc->rootpathlen, &loc->rootpath); + if (unlikely(status != 0)) + goto out_eio; + if (res->nlocations < NFS_FS_LOCATIONS_MAXENTRIES) + res->nlocations++; + } +out: + dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status); + return status; +out_eio: + status = -EIO; + goto out; +} + static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2867,6 +2947,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; + if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs_fs_locations, + fattr))) != 0) + goto xdr_error; if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) goto xdr_error; fattr->mode |= fmode; @@ -4210,6 +4294,29 @@ out: return status; } +/* + * FS_LOCATIONS request + */ +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs_fs_locations *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + xdr_enter_page(&xdr, PAGE_SIZE); + status = decode_getfattr(&xdr, &res->fattr, res->server); +out: + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4381,6 +4488,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), PROC(GETACL, enc_getacl, dec_getacl), PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), }; struct rpc_version nfs_version4 = { -- cgit v1.2.3-18-g5258 From 7aaa0b3bd4d215d9ce4d62b6c2043a63ba650f93 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:23 -0400 Subject: NFSv4: convert fs-locations-components to conform to RFC3530 Use component4-style formats for decoding list of servers and pathnames in fs_locations. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 4 +-- fs/nfs/nfs4xdr.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 74 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5b765117121..22a5f838ea5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -219,7 +219,7 @@ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct n extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, - struct nfs_fs_locations *fs_locations, struct page *page); + struct nfs4_fs_locations *fs_locations, struct page *page); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 768514dc0c4..043223a0eda 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3571,7 +3571,7 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) } int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, - struct nfs_fs_locations *fs_locations, struct page *page) + struct nfs4_fs_locations *fs_locations, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); u32 bitmask[2] = { @@ -3587,7 +3587,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], .rpc_argp = &args, - .rpc_resp = &fs_locations, + .rpc_resp = fs_locations, }; int status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7add3137b6b..f6a1ea7df37 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2377,7 +2377,43 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin return status; } -static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fs_locations *res) +static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) +{ + int n; + uint32_t *p; + int status = 0; + + READ_BUF(4); + READ32(n); + if (n <= 0) + goto out_eio; + dprintk("path "); + path->ncomponents = 0; + while (path->ncomponents < n) { + struct nfs4_string *component = &path->components[path->ncomponents]; + status = decode_opaque_inline(xdr, &component->len, &component->data); + if (unlikely(status != 0)) + goto out_eio; + if (path->ncomponents != n) + dprintk("/"); + dprintk("%s", component->data); + if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) + path->ncomponents++; + else { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + } +out: + dprintk("\n"); + return status; +out_eio: + dprintk(" status %d", status); + status = -EIO; + goto out; +} + +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) { int n; uint32_t *p; @@ -2388,7 +2424,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; - status = decode_opaque_inline(xdr, &res->fs_pathlen, &res->fs_path); + dprintk("%s: fsroot ", __FUNCTION__); + status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) goto out; READ_BUF(4); @@ -2397,15 +2434,40 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st goto out_eio; res->nlocations = 0; while (res->nlocations < n) { - struct nfs_fs_location *loc = &res->locations[res->nlocations]; + int m; + struct nfs4_fs_location *loc = &res->locations[res->nlocations]; - status = decode_opaque_inline(xdr, &loc->serverlen, &loc->server); - if (unlikely(status != 0)) + READ_BUF(4); + READ32(m); + if (m <= 0) goto out_eio; - status = decode_opaque_inline(xdr, &loc->rootpathlen, &loc->rootpath); + + loc->nservers = 0; + dprintk("%s: servers ", __FUNCTION__); + while (loc->nservers < m) { + struct nfs4_string *server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); + if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) + loc->nservers++; + else { + int i; + dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); + for (i = loc->nservers; i < m; i++) { + int len; + char *data; + status = decode_opaque_inline(xdr, &len, &data); + if (unlikely(status != 0)) + goto out_eio; + } + } + } + status = decode_pathname(xdr, &loc->rootpath); if (unlikely(status != 0)) goto out_eio; - if (res->nlocations < NFS_FS_LOCATIONS_MAXENTRIES) + if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) res->nlocations++; } out: @@ -2948,7 +3010,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, - struct nfs_fs_locations, + struct nfs4_fs_locations, fattr))) != 0) goto xdr_error; if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) @@ -4297,7 +4359,7 @@ out: /* * FS_LOCATIONS request */ -static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs_fs_locations *res) +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations *res) { struct xdr_stream xdr; struct compound_hdr hdr; -- cgit v1.2.3-18-g5258 From 99baf625d3b9b8944920acc7c2d06079a37458c5 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:24 -0400 Subject: NFSv4: Decode mounted_on_fileid attribute in getattr. It is ignored if fileid is also requested. This will be used on referrals (fs_locations). Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f6a1ea7df37..4b6613f6134 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2326,6 +2326,22 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t return 0; } +static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) +{ + uint32_t *p; + + *fileid = 0; + if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) + return -EIO; + if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { + READ_BUF(8); + READ64(*fileid); + bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + } + dprintk("%s: fileid=%Lu\n", __FUNCTION__, (unsigned long long)*fileid); + return 0; +} + static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2983,6 +2999,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons bitmap[2] = {0}, type; int status, fmode = 0; + uint64_t fileid; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto xdr_error; @@ -3032,6 +3049,10 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) goto xdr_error; + if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0) + goto xdr_error; + if (fattr->fileid == 0 && fileid != 0) + fattr->fileid = fileid; if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; xdr_error: -- cgit v1.2.3-18-g5258 From 361e624f6d8bfbeac53769603d995d47535cfd46 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:24 -0400 Subject: NFSv4: GETATTR attributes on referral Per referral draft, only fs_locations, fsid, and mounted_on_fileid can be requested in a GETATTR on referrals. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 043223a0eda..8640607d6a0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3575,8 +3575,8 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, { struct nfs_server *server = NFS_SERVER(dir); u32 bitmask[2] = { - [0] = server->attr_bitmask[0] | FATTR4_WORD0_FS_LOCATIONS, - [1] = server->attr_bitmask[1], + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + [1] = FATTR4_WORD1_MOUNTED_ON_FILEID, }; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), -- cgit v1.2.3-18-g5258 From 830b8e33fe1900b87c8eb7ec5c646117a9f298d6 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:25 -0400 Subject: NFSv4: Define an fs_locations bitmap This is (similar to getattr bitmap) but includes fs_locations and mounted_on_fileid attributes. Use this bitmap for encoding in fs_locations requests. Note: We can probably do better by requesting locations as part of fsinfo itself. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 20 ++++++++++++++++++++ fs/nfs/nfs4xdr.c | 11 +++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 22a5f838ea5..9a102860df3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -228,6 +228,7 @@ extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[2]; +extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ extern void nfs4_schedule_state_renewal(struct nfs4_client *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8640607d6a0..90ee21a07b3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -121,6 +121,25 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE 0 }; +const u32 nfs4_fs_locations_bitmap[2] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID + | FATTR4_WORD0_FS_LOCATIONS, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY + | FATTR4_WORD1_MOUNTED_ON_FILEID +}; + static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { @@ -3594,6 +3613,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, dprintk("%s: start\n", __FUNCTION__); fs_locations->fattr.valid = 0; fs_locations->server = server; + fs_locations->nlocations = 0; status = rpc_call_sync(server->client, &msg, 0); dprintk("%s: returned status = %d\n", __FUNCTION__, status); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4b6613f6134..646f16da072 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -731,6 +731,13 @@ static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) bitmask[1] & nfs4_fsinfo_bitmap[1]); } +static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask) +{ + return encode_getattr_two(xdr, + bitmask[0] & nfs4_fs_locations_bitmap[0], + bitmask[1] & nfs4_fs_locations_bitmap[1]); +} + static int encode_getfh(struct xdr_stream *xdr) { uint32_t *p; @@ -2030,10 +2037,10 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct n goto out; if ((status = encode_lookup(&xdr, args->name)) != 0) goto out; - if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0) goto out; /* set up reply - * toplevel_status + taglen + rescount + OP_PUTFH + status + * toplevel_status + OP_PUTFH + status * + OP_LOOKUP + status + OP_GETATTR + status = 7 */ replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; -- cgit v1.2.3-18-g5258 From c818ba43f9ca2e8214412ab5f126b1f436c35098 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:26 -0400 Subject: NFSv4: Create NFSv4 transport and client Move existing code into a separate function so that it can be also used by referral code. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 131 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3eea556d8f5..db62a5a7e4f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2027,62 +2027,24 @@ static void nfs4_clear_inode(struct inode *inode) } -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, + struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) { - struct nfs_server *server; - struct nfs4_client *clp = NULL; + struct nfs4_client *clp; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; - struct rpc_timeout timeparms; - rpc_authflavor_t authflavour; int err = -EIO; - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - server->rpc_ops = &nfs_v4_clientops; - - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return -EIO; + return ERR_PTR(err); } /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - down_write(&clp->cl_sem); if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(data->proto, &server->addr, &timeparms); + xprt = xprt_create_proto(proto, &server->addr, timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); err = PTR_ERR(xprt); @@ -2091,7 +2053,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); + server->rpc_ops->version, flavor); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); err = PTR_ERR(clnt); @@ -2108,43 +2070,96 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) - server->nfs4_state = clp; + server->nfs4_state = clp; up_write(&clp->cl_sem); clp = NULL; if (IS_ERR(clnt)) { - err = PTR_ERR(clnt); dprintk("%s: cannot create RPC client. Error = %d\n", __FUNCTION__, err); - return err; + return clnt; } - server->client = clnt; - if (server->nfs4_state->cl_idmap == NULL) { dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - if (clnt->cl_auth->au_flavor != authflavour) { + if (clnt->cl_auth->au_flavor != flavor) { struct rpc_auth *auth; - auth = rpcauth_create(authflavour, clnt); + auth = rpcauth_create(flavor, clnt); if (IS_ERR(auth)) { dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return PTR_ERR(auth); + return (struct rpc_clnt *)auth; + } + } + return clnt; + + out_fail: + if (clp) + nfs4_put_client(clp); + return ERR_PTR(err); +} + +static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +{ + struct nfs_server *server; + struct rpc_timeout timeparms; + rpc_authflavor_t authflavour; + int err = -EIO; + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + server = NFS_SB(sb); + if (data->rsize != 0) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize != 0) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps = NFS_CAP_ATOMIC_OPEN; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + server->rpc_ops = &nfs_v4_clientops; + + nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + + /* Now create transport and client */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } + if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { + err = -EFAULT; + goto out_fail; } } + server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); + if (IS_ERR(server->client)) { + err = PTR_ERR(server->client); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + sb->s_time_gran = 1; sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); - if (err == 0) - return 0; -out_fail: - if (clp) - nfs4_put_client(clp); + + out_fail: return err; } -- cgit v1.2.3-18-g5258 From 61f5164cab1f6fdf06871ea9d60fe2f912184078 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:27 -0400 Subject: NFS: Expand clone mounts to include other servers Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 111 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index db62a5a7e4f..ebdab885c47 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1717,14 +1717,13 @@ struct nfs_clone_mount { }; static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, - struct super_block *(*clone_client)(struct nfs_server *, struct nfs_clone_mount *)) + struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), + struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *)) { struct nfs_server *server; struct nfs_server *parent = NFS_SB(data->sb); struct super_block *sb = ERR_PTR(-EINVAL); void *err = ERR_PTR(-ENOMEM); - struct inode *root_inode; - struct nfs_fsinfo fsinfo; int len; server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); @@ -1736,53 +1735,17 @@ static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, if (server->hostname == NULL) goto free_server; memcpy(server->hostname, parent->hostname, len); - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); if (rpciod_up() != 0) goto free_hostname; - sb = clone_client(server, data); + sb = fill_sb(server, data); if (IS_ERR((err = sb)) || sb->s_root) goto kill_rpciod; - sb->s_op = data->sb->s_op; - sb->s_blocksize = data->sb->s_blocksize; - sb->s_blocksize_bits = data->sb->s_blocksize_bits; - sb->s_maxbytes = data->sb->s_maxbytes; - - server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - err = ERR_PTR(-ENOMEM); - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) + server = fill_server(sb, data); + if (IS_ERR((err = server))) goto out_deactivate; - - server->client = rpc_clone_client(parent->client); - if (IS_ERR((err = server->client))) - goto out_deactivate; - if (!IS_ERR(parent->client_sys)) { - server->client_sys = rpc_clone_client(parent->client_sys); - if (IS_ERR((err = server->client_sys))) - goto out_deactivate; - } - if (!IS_ERR(parent->client_acl)) { - server->client_acl = rpc_clone_client(parent->client_acl); - if (IS_ERR((err = server->client_acl))) - goto out_deactivate; - } - root_inode = nfs_fhget(sb, data->fh, data->fattr); - if (!root_inode) - goto out_deactivate; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_put_root; - fsinfo.fattr = data->fattr; - if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->rpc_ops->dentry_ops; - sb->s_flags |= MS_ACTIVE; return sb; -out_put_root: - iput(root_inode); out_deactivate: up_write(&sb->s_umount); deactivate_super(sb); @@ -1955,21 +1918,73 @@ static struct file_system_type nfs_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -static struct super_block *nfs_clone_client(struct nfs_server *server, struct nfs_clone_mount *data) +static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { struct super_block *sb; + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) lockd_up(); return sb; } +static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_server *parent = NFS_SB(data->sb); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + void *err = ERR_PTR(-ENOMEM); + + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto out; + + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out; + + if (!IS_ERR(parent->client_sys)) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out; + } + if (!IS_ERR(parent->client_acl)) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return server; +out_put_root: + iput(root_inode); +out: + return err; +} + static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs_clone_client); + return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server); } static struct file_system_type clone_nfs_fs_type = { @@ -2371,12 +2386,14 @@ static inline char *nfs4_dup_path(const struct dentry *dentry) return path; } -static struct super_block *nfs4_clone_client(struct nfs_server *server, struct nfs_clone_mount *data) +static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) { const struct dentry *dentry = data->dentry; struct nfs4_client *clp = server->nfs4_state; struct super_block *sb; + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); server->mnt_path = nfs4_dup_path(dentry); if (IS_ERR(server->mnt_path)) { sb = (struct super_block *)server->mnt_path; @@ -2403,12 +2420,12 @@ static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_clone_client); + return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server); } static struct file_system_type clone_nfs4_fs_type = { .owner = THIS_MODULE, - .name = "nfs", + .name = "nfs4", .get_sb = nfs_clone_nfs4_sb, .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -- cgit v1.2.3-18-g5258 From 9cdb3883c38f883436a84c2353a4cf964ff890a2 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:28 -0400 Subject: NFSv4: Ensure client submounts when following a referral Set up mountpoint when hitting a referral on moved error by getting fs_locations. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 267 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ebdab885c47..0d8302e59d6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -1714,6 +1716,10 @@ struct nfs_clone_mount { const struct dentry *dentry; struct nfs_fh *fh; struct nfs_fattr *fattr; + char *hostname; + char *mnt_path; + struct sockaddr_in *addr; + rpc_authflavor_t authflavor; }; static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, @@ -1724,17 +1730,19 @@ static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, struct nfs_server *parent = NFS_SB(data->sb); struct super_block *sb = ERR_PTR(-EINVAL); void *err = ERR_PTR(-ENOMEM); + char *hostname; int len; server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (server == NULL) goto out_err; memcpy(server, parent, sizeof(*server)); - len = strlen(parent->hostname) + 1; + hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; + len = strlen(hostname) + 1; server->hostname = kmalloc(len, GFP_KERNEL); if (server->hostname == NULL) goto free_server; - memcpy(server->hostname, parent->hostname, len); + memcpy(server->hostname, hostname, len); if (rpciod_up() != 0) goto free_hostname; @@ -2458,7 +2466,8 @@ static inline void unregister_nfs4fs(void) nfs_unregister_sysctl(); } #else -#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL) +#define nfs4_fill_sb(a,b) ERR_PTR(-EINVAL) +#define nfs4_fill_super(a,b) ERR_PTR(-EINVAL) #define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) @@ -2521,6 +2530,261 @@ out: return mnt; } +/* Check if fs_root is valid */ +static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) +{ + char *end = buffer + buflen; + int n; + + *--end = '\0'; + buflen--; + + n = pathname->ncomponents; + while (--n >= 0) { + struct nfs4_string *component = &pathname->components[n]; + buflen -= component->len + 1; + if (buflen < 0) + goto Elong; + end -= component->len; + memcpy(end, component->data, component->len); + *--end = '/'; + } + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + +/* Check if the string represents a "valid" IPv4 address */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + +static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb = ERR_PTR(-ENOMEM); + int len; + + len = strlen(data->mnt_path) + 1; + server->mnt_path = kmalloc(len, GFP_KERNEL); + if (server->mnt_path == NULL) + goto err; + memcpy(server->mnt_path, data->mnt_path, len); + memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); + + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct rpc_timeout timeparms; + int proto, timeo, retrans; + void *err; + + proto = IPPROTO_TCP; + /* Since we are following a referral and there may be alternatives, + set the timeouts and retries to low values */ + timeo = 2; + retrans = 1; + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + + server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); + if (IS_ERR((err = server->client))) + goto out_err; + + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); + if (!IS_ERR(err)) + return server; +out_err: + return (struct nfs_server *)err; +} + +static struct super_block *nfs_referral_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server); +} + +static struct file_system_type nfs_referral_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs_referral_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +/** + * nfs_follow_referral - set up mountpoint when hitting a referral on moved error + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fspath - fs path returned in fs_locations + * @mntpath - mount path to new server + * @hostname - hostname of new server + * @addr - host addr of new server + * + */ +struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs4_fs_locations *locations) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, + }; + char *page, *page2; + char *path, *fs_path; + char *devname; + int loc, s; + + if (locations == NULL || locations->nlocations <= 0) + goto out; + + dprintk("%s: referral at %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* Ensure fs path is a prefix of current dentry path */ + page = (char *) __get_free_page(GFP_USER); + if (page == NULL) + goto out; + page2 = (char *) __get_free_page(GFP_USER); + if (page2 == NULL) + goto out; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + goto out_free; + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + goto out_free; + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); + goto out_free; + } + + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(devname)) { + mnt = (struct vfsmount *)devname; + goto out_free; + } + + loc = 0; + while (loc < locations->nlocations && IS_ERR(mnt)) { + struct nfs4_fs_location *location = &locations->locations[loc]; + char *mnt_path; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) { + loc++; + continue; + } + + mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); + if (IS_ERR(mnt_path)) { + loc++; + continue; + } + mountdata.mnt_path = mnt_path; + + s = 0; + while (s < location->nservers) { + struct sockaddr_in addr = {}; + + if (location->servers[s].len <= 0 || + valid_ipaddr4(location->servers[s].data) < 0) { + s++; + continue; + } + + mountdata.hostname = location->servers[s].data; + addr.sin_addr.s_addr = in_aton(mountdata.hostname); + addr.sin_family = AF_INET; + addr.sin_port = htons(NFS_PORT); + mountdata.addr = &addr; + + mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + if (!IS_ERR(mnt)) { + break; + } + s++; + } + loc++; + } + +out_free: + free_page((unsigned long)page); + free_page((unsigned long)page2); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + +/* + * nfs_do_refmount - handle crossing a referral on server + * @dentry - dentry of referral + * @nd - nameidata info + * + */ +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct dentry *parent; + struct nfs4_fs_locations *fs_locations = NULL; + struct page *page; + int err; + + /* BUG_ON(IS_ROOT(dentry)); */ + dprintk("%s: enter\n", __FUNCTION__); + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + + fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (fs_locations == NULL) + goto out_free; + + /* Get locations */ + parent = dget_parent(dentry); + dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); + dput(parent); + if (err != 0 || fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) + goto out_free; + + mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); +out_free: + __free_page(page); + kfree(fs_locations); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); -- cgit v1.2.3-18-g5258 From 6b97fd3da1eab2cc490cfe884c7d4956522eaf8b Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Fri, 9 Jun 2006 09:34:29 -0400 Subject: NFSv4: Follow a referral Respond to a moved error on NFS lookup by setting up the referral. Note: We don't actually follow the referral during lookup/getattr, but later when we detect fsid mismatch in inode revalidation (similar to the processing done for cloning submounts). Referrals will have fake attributes until they are actually followed or traversed. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 ++++- fs/nfs/namespace.c | 9 ++++++++- fs/nfs/nfs4proc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0d8302e59d6..ee13cb01b56 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -888,7 +888,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); /* Deal with crossing mountpoints */ if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { - inode->i_op = &nfs_mountpoint_inode_operations; + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + inode->i_op = &nfs_referral_inode_operations; + else + inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; } } else if (S_ISLNK(inode->i_mode)) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e426516c111..8ca44b7b25c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -58,7 +58,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) if (err != 0) goto out_err; - mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) + mnt = nfs_do_refmount(nd->mnt, nd->dentry); + else + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); err = PTR_ERR(mnt); if (IS_ERR(mnt)) goto out_err; @@ -94,6 +97,10 @@ struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, }; +struct inode_operations nfs_referral_inode_operations = { + .follow_link = nfs_follow_mountpoint, +}; + static void nfs_expire_automounts(void *data) { struct list_head *list = (struct list_head *)data; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 90ee21a07b3..3300e35d74a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1462,6 +1462,50 @@ out: return nfs4_map_errors(status); } +/* + * Get locations and (maybe) other attributes of a referral. + * Note that we'll actually follow the referral later when + * we detect fsid mismatch in inode revalidation + */ +static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle) +{ + int status = -ENOMEM; + struct page *page = NULL; + struct nfs4_fs_locations *locations = NULL; + struct dentry dentry = {}; + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (locations == NULL) + goto out; + + dentry.d_name.name = name->name; + dentry.d_name.len = name->len; + status = nfs4_proc_fs_locations(dir, &dentry, locations, page); + if (status != 0) + goto out; + /* Make sure server returned a different fsid for the referral */ + if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { + dprintk("%s: server did not return a different fsid for a referral at %s\n", __FUNCTION__, name->name); + status = -EIO; + goto out; + } + + memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); + fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL; + if (!fattr->mode) + fattr->mode = S_IFDIR; + memset(fhandle, 0, sizeof(struct nfs_fh)); +out: + if (page) + __free_page(page); + if (locations) + kfree(locations); + return status; +} + static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs4_getattr_arg args = { @@ -1566,6 +1610,8 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, dprintk("NFS call lookup %s\n", name->name); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + if (status == -NFS4ERR_MOVED) + status = nfs4_get_referral(dir, name, fattr, fhandle); dprintk("NFS reply lookup: %d\n", status); return status; } -- cgit v1.2.3-18-g5258 From 33a43f2802d8d7be3a9b541785c4ca9ad79e4310 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Jun 2006 09:34:30 -0400 Subject: NFSv4: A root pathname is sent as a zero component4 Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 646f16da072..1750d996f49 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2408,8 +2408,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) READ_BUF(4); READ32(n); - if (n <= 0) + if (n < 0) goto out_eio; + if (n == 0) + goto root_path; dprintk("path "); path->ncomponents = 0; while (path->ncomponents < n) { @@ -2430,6 +2432,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) out: dprintk("\n"); return status; +root_path: +/* a root pathname is sent as a zero component4 */ + path->ncomponents = 1; + path->components[0].len=0; + path->components[0].data=NULL; + dprintk("path /\n"); + goto out; out_eio: dprintk(" status %d", status); status = -EIO; -- cgit v1.2.3-18-g5258 From 87e4ba1a62af8e05ee3e8f8aaca622714386ffb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:30 -0400 Subject: NFSv4: Ensure that referral mounts bind to a reserved port Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ee13cb01b56..648f593de0f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2078,6 +2078,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, __FUNCTION__, err); goto out_fail; } + /* Bind to a reserved port! */ + xprt->resvport = 1; clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, flavor); if (IS_ERR(clnt)) { -- cgit v1.2.3-18-g5258 From 860de07139980afe9856cc31eb5efbf321bbcea4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:31 -0400 Subject: NFS: Fix compile errors introduced by referrals patches Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 648f593de0f..550a84dd41a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2444,6 +2444,20 @@ static struct file_system_type clone_nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, struct nfs_clone_mount *mountdata) +{ + struct vfsmount *mnt = NULL; + switch (server->rpc_ops->version) { + case 2: + case 3: + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + break; + case 4: + mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + } + return mnt; +} + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -2477,6 +2491,10 @@ static inline void unregister_nfs4fs(void) do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, struct nfs_clone_mount *mountdata) +{ + return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); +} #endif static inline char *nfs_devname(const struct vfsmount *mnt_parent, @@ -2517,17 +2535,7 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, mnt = (struct vfsmount *)devname; if (IS_ERR(devname)) goto free_page; - switch (NFS_SB(mnt_parent->mnt_sb)->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); - break; - case 4: - mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, &mountdata); - break; - default: - BUG(); - } + mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); free_page: free_page((unsigned long)page); out: @@ -2535,6 +2543,7 @@ out: return mnt; } +#ifdef CONFIG_NFS_V4 /* Check if fs_root is valid */ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) { @@ -2789,6 +2798,12 @@ out: dprintk("%s: done\n", __FUNCTION__); return mnt; } +#else +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + return ERR_PTR(-ENOENT); +} +#endif extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); -- cgit v1.2.3-18-g5258 From 4e5ccf60c5aa79d325c123f47d288a068166f389 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:32 -0400 Subject: NFS: Fix typo in nfs_do_clone_mount() Doh! Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 550a84dd41a..7ab2b38a990 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2450,7 +2450,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devn switch (server->rpc_ops->version) { case 2: case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); break; case 4: mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); @@ -2493,7 +2493,7 @@ static inline void unregister_nfs4fs(void) #define unregister_nfs4fs() static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, struct nfs_clone_mount *mountdata) { - return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); } #endif -- cgit v1.2.3-18-g5258 From f7b422b17ee5ee4920e8ae24a6ad04bf3481ce72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Jun 2006 09:34:33 -0400 Subject: NFS: Split fs/nfs/inode.c As fs/nfs/inode.c is rather large, heterogenous and unwieldy, the attached patch splits it up into a number of files: (*) fs/nfs/inode.c Strictly inode specific functions. (*) fs/nfs/super.c Superblock management functions for NFS and NFS4, normal access, clones and referrals. The NFS4 superblock functions _could_ move out into a separate conditionally compiled file, but it's probably not worth it as there're so many common bits. (*) fs/nfs/namespace.c Some namespace-specific functions have been moved here. (*) fs/nfs/nfs4namespace.c NFS4-specific namespace functions (this could be merged into the previous file). This file is conditionally compiled. (*) fs/nfs/internal.h Inter-file declarations, plus a few simple utility functions moved from fs/nfs/inode.c. Additionally, all the in-.c-file externs have been moved here, and those files they were moved from now includes this file. For the most part, the functions have not been changed, only some multiplexor functions have changed significantly. I've also: (*) Added some extra banner comments above some functions. (*) Rearranged the function order within the files to be more logical and better grouped (IMO), though someone may prefer a different order. (*) Reduced the number of #ifdefs in .c files. (*) Added missing __init and __exit directives. Signed-Off-By: David Howells --- fs/nfs/Makefile | 5 +- fs/nfs/callback.c | 2 - fs/nfs/direct.c | 6 +- fs/nfs/inode.c | 1793 +----------------------------------------------- fs/nfs/internal.h | 179 +++++ fs/nfs/namespace.c | 112 ++- fs/nfs/nfs2xdr.c | 2 - fs/nfs/nfs3proc.c | 5 +- fs/nfs/nfs3xdr.c | 3 +- fs/nfs/nfs4namespace.c | 201 ++++++ fs/nfs/nfs4proc.c | 2 - fs/nfs/pagelist.c | 4 +- fs/nfs/proc.c | 5 +- fs/nfs/read.c | 4 +- fs/nfs/super.c | 1468 +++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 4 +- 16 files changed, 1993 insertions(+), 1802 deletions(-) create mode 100644 fs/nfs/internal.h create mode 100644 fs/nfs/nfs4namespace.c create mode 100644 fs/nfs/super.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index d9d494cee38..0b572a0c196 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o -nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ +nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ proc.o read.o symlink.o unlink.o write.o \ namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o @@ -12,7 +12,8 @@ nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ - callback.o callback_xdr.o callback_proc.o + callback.o callback_xdr.o callback_proc.o \ + nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 90c95adc8c1..d53f8c6a9ec 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -182,8 +182,6 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) /* * Define NFS4 callback program */ -extern struct svc_version nfs4_callback_version1; - static struct svc_version *nfs4_callback_version[] = { [1] = &nfs4_callback_version1, }; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 3c72b0c0728..402005c35ab 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -892,7 +892,7 @@ out: * nfs_init_directcache - create a slab cache for nfs_direct_req structures * */ -int nfs_init_directcache(void) +int __init nfs_init_directcache(void) { nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", sizeof(struct nfs_direct_req), @@ -906,10 +906,10 @@ int nfs_init_directcache(void) } /** - * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures + * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures * */ -void nfs_destroy_directcache(void) +void __exit nfs_destroy_directcache(void) { if (kmem_cache_destroy(nfs_direct_cachep)) printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n"); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7ab2b38a990..24a7139d344 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -46,87 +46,17 @@ #include "callback.h" #include "delegation.h" #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 -/* Maximum number of readahead requests - * FIXME: this should really be a sysctl so that users may tune it to suit - * their needs. People that do NFS over a slow network, might for - * instance want to reduce it to something closer to 1 for improved - * interactive response. - */ -#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) - static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); -static struct inode *nfs_alloc_inode(struct super_block *sb); -static void nfs_destroy_inode(struct inode *); -static int nfs_write_inode(struct inode *,int); -static void nfs_clear_inode(struct inode *); -static void nfs_umount_begin(struct vfsmount *, int); -static int nfs_statfs(struct super_block *, struct kstatfs *); -static int nfs_show_options(struct seq_file *, struct vfsmount *); -static int nfs_show_stats(struct seq_file *, struct vfsmount *); static void nfs_zap_acl_cache(struct inode *); -static struct rpc_program nfs_program; - -static struct super_operations nfs_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .statfs = nfs_statfs, - .clear_inode = nfs_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_stats = nfs_show_stats, -}; - -/* - * RPC cruft for NFS - */ -static struct rpc_stat nfs_rpcstat = { - .program = &nfs_program -}; -static struct rpc_version * nfs_version[] = { - NULL, - NULL, - &nfs_version2, -#if defined(CONFIG_NFS_V3) - &nfs_version3, -#elif defined(CONFIG_NFS_V4) - NULL, -#endif -#if defined(CONFIG_NFS_V4) - &nfs_version4, -#endif -}; - -static struct rpc_program nfs_program = { - .name = "nfs", - .number = NFS_PROGRAM, - .nrvers = ARRAY_SIZE(nfs_version), - .version = nfs_version, - .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", -}; - -#ifdef CONFIG_NFS_V3_ACL -static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { - [3] = &nfsacl_version3, -}; - -struct rpc_program nfsacl_program = { - .name = "nfsacl", - .number = NFS_ACL_PROGRAM, - .nrvers = ARRAY_SIZE(nfsacl_version), - .version = nfsacl_version, - .stats = &nfsacl_rpcstat, -}; -#endif /* CONFIG_NFS_V3_ACL */ +static kmem_cache_t * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) @@ -134,8 +64,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -static int -nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, int sync) { int flags = sync ? FLUSH_SYNC : 0; int ret; @@ -146,8 +75,7 @@ nfs_write_inode(struct inode *inode, int sync) return 0; } -static void -nfs_clear_inode(struct inode *inode) +void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct rpc_cred *cred; @@ -164,566 +92,6 @@ nfs_clear_inode(struct inode *inode) BUG_ON(atomic_read(&nfsi->data_updates) != 0); } -static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) -{ - struct nfs_server *server; - struct rpc_clnt *rpc; - - shrink_submounts(vfsmnt, &nfs_automount_list); - if (!(flags & MNT_FORCE)) - return; - /* -EIO all pending I/O */ - server = NFS_SB(vfsmnt->mnt_sb); - rpc = server->client; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); - rpc = server->client_acl; - if (!IS_ERR(rpc)) - rpc_killall_tasks(rpc); -} - - -static inline unsigned long -nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) -{ - /* make sure blocksize is a power of two */ - if ((bsize & (bsize - 1)) || nrbitsp) { - unsigned char nrbits; - - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) - ; - bsize = 1 << nrbits; - if (nrbitsp) - *nrbitsp = nrbits; - } - - return bsize; -} - -/* - * Calculate the number of 512byte blocks used. - */ -static inline unsigned long -nfs_calc_block_size(u64 tsize) -{ - loff_t used = (tsize + 511) >> 9; - return (used > ULONG_MAX) ? ULONG_MAX : used; -} - -/* - * Compute and set NFS server blocksize - */ -static inline unsigned long -nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) -{ - if (bsize < NFS_MIN_FILE_IO_SIZE) - bsize = NFS_DEF_FILE_IO_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_SIZE) - bsize = NFS_MAX_FILE_IO_SIZE; - - return nfs_block_bits(bsize, nrbitsp); -} - -static inline void -nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) -{ - sb->s_maxbytes = (loff_t)maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) - sb->s_maxbytes = MAX_LFS_FILESIZE; -} - -/* - * Obtain the root inode of the file system. - */ -static struct inode * -nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) -{ - struct nfs_server *server = NFS_SB(sb); - int error; - - error = server->rpc_ops->getroot(server, rootfh, fsinfo); - if (error < 0) { - dprintk("nfs_get_root: getattr error = %d\n", -error); - return ERR_PTR(error); - } - - server->fsid = fsinfo->fattr->fsid; - return nfs_fhget(sb, rootfh, fsinfo->fattr); -} - -/* - * Do NFS version-independent mount processing, and sanity checking - */ -static int -nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) -{ - struct nfs_server *server; - struct inode *root_inode; - struct nfs_fattr fattr; - struct nfs_fsinfo fsinfo = { - .fattr = &fattr, - }; - struct nfs_pathconf pathinfo = { - .fattr = &fattr, - }; - int no_root_error = 0; - unsigned long max_rpc_payload; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - return -ENOMEM; - - root_inode = nfs_get_root(sb, &server->fh, &fsinfo); - /* Did getting the root inode fail? */ - if (IS_ERR(root_inode)) { - no_root_error = PTR_ERR(root_inode); - goto out_no_root; - } - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) { - no_root_error = -ENOMEM; - goto out_no_root; - } - sb->s_root->d_op = server->rpc_ops->dentry_ops; - - /* mount time stamp, in seconds */ - server->mount_time = jiffies; - - /* Get some general file system info */ - if (server->namelen == 0 && - server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; - /* Work out a lot of parameters */ - if (server->rsize == 0) - server->rsize = nfs_block_size(fsinfo.rtpref, NULL); - if (server->wsize == 0) - server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - - if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) - server->rsize = nfs_block_size(fsinfo.rtmax, NULL); - if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) - server->wsize = nfs_block_size(fsinfo.wtmax, NULL); - - max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); - if (server->rsize > max_rpc_payload) - server->rsize = max_rpc_payload; - if (server->rsize > NFS_MAX_FILE_IO_SIZE) - server->rsize = NFS_MAX_FILE_IO_SIZE; - server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - if (server->wsize > NFS_MAX_FILE_IO_SIZE) - server->wsize = NFS_MAX_FILE_IO_SIZE; - server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); - - server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); - if (server->dtsize > PAGE_CACHE_SIZE) - server->dtsize = PAGE_CACHE_SIZE; - if (server->dtsize > server->rsize) - server->dtsize = server->rsize; - - if (server->flags & NFS_MOUNT_NOAC) { - server->acregmin = server->acregmax = 0; - server->acdirmin = server->acdirmax = 0; - sb->s_flags |= MS_SYNCHRONOUS; - } - server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - - server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; - - /* We're airborne Set socket buffersize */ - rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); - return 0; - /* Yargs. It didn't work out. */ -out_no_root: - dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); - if (!IS_ERR(root_inode)) - iput(root_inode); - return no_root_error; -} - -static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) -{ - to->to_initval = timeo * HZ / 10; - to->to_retries = retrans; - if (!to->to_retries) - to->to_retries = 2; - - switch (proto) { - case IPPROTO_TCP: - if (!to->to_initval) - to->to_initval = 60 * HZ; - if (to->to_initval > NFS_MAX_TCP_TIMEOUT) - to->to_initval = NFS_MAX_TCP_TIMEOUT; - to->to_increment = to->to_initval; - to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); - to->to_exponential = 0; - break; - case IPPROTO_UDP: - default: - if (!to->to_initval) - to->to_initval = 11 * HZ / 10; - if (to->to_initval > NFS_MAX_UDP_TIMEOUT) - to->to_initval = NFS_MAX_UDP_TIMEOUT; - to->to_maxval = NFS_MAX_UDP_TIMEOUT; - to->to_exponential = 1; - break; - } -} - -/* - * Create an RPC client handle. - */ -static struct rpc_clnt * -nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) -{ - struct rpc_timeout timeparms; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; - - nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (IS_ERR(xprt)) { - dprintk("%s: cannot create RPC transport. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - return (struct rpc_clnt *)xprt; - } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, data->pseudoflavor); - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %ld\n", - __FUNCTION__, PTR_ERR(xprt)); - goto out_fail; - } - - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - - return clnt; - -out_fail: - return clnt; -} - -/* - * The way this works is that the mount process passes a structure - * in the data argument which contains the server's IP address - * and the root file handle obtained from the server's mount - * daemon. We stash these away in the private superblock fields. - */ -static int -nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) -{ - struct nfs_server *server; - rpc_authflavor_t authflavor; - - server = NFS_SB(sb); - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - if (data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - /* Start lockd here, before we might error out */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - - server->namelen = data->namlen; - server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); - if (!server->hostname) - return -ENOMEM; - strcpy(server->hostname, data->hostname); - - /* Check NFS protocol revision and initialize RPC op vector - * and file handle pool. */ -#ifdef CONFIG_NFS_V3 - if (server->flags & NFS_MOUNT_VER3) { - server->rpc_ops = &nfs_v3_clientops; - server->caps |= NFS_CAP_READDIRPLUS; - } else { - server->rpc_ops = &nfs_v2_clientops; - } -#else - server->rpc_ops = &nfs_v2_clientops; -#endif - - /* Fill in pseudoflavor for mount version < 5 */ - if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) - data->pseudoflavor = RPC_AUTH_UNIX; - authflavor = data->pseudoflavor; /* save for sb_init() */ - /* XXX maybe we want to add a server->pseudoflavor field */ - - /* Create RPC client handles */ - server->client = nfs_create_client(server, data); - if (IS_ERR(server->client)) - return PTR_ERR(server->client); - /* RFC 2623, sec 2.3.2 */ - if (authflavor != RPC_AUTH_UNIX) { - struct rpc_auth *auth; - - server->client_sys = rpc_clone_client(server->client); - if (IS_ERR(server->client_sys)) - return PTR_ERR(server->client_sys); - auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); - if (IS_ERR(auth)) - return PTR_ERR(auth); - } else { - atomic_inc(&server->client->cl_count); - server->client_sys = server->client; - } - if (server->flags & NFS_MOUNT_VER3) { -#ifdef CONFIG_NFS_V3_ACL - if (!(server->flags & NFS_MOUNT_NOACL)) { - server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); - /* No errors! Assume that Sun nfsacls are supported */ - if (!IS_ERR(server->client_acl)) - server->caps |= NFS_CAP_ACLS; - } -#else - server->flags &= ~NFS_MOUNT_NOACL; -#endif /* CONFIG_NFS_V3_ACL */ - /* - * The VFS shouldn't apply the umask to mode bits. We will - * do so ourselves when necessary. - */ - sb->s_flags |= MS_POSIXACL; - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; - sb->s_time_gran = 1; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; - } - - sb->s_op = &nfs_sops; - return nfs_sb_init(sb, authflavor); -} - -static int -nfs_statfs(struct super_block *sb, struct kstatfs *buf) -{ - struct nfs_server *server = NFS_SB(sb); - unsigned char blockbits; - unsigned long blockres; - struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); - struct nfs_fattr fattr; - struct nfs_fsstat res = { - .fattr = &fattr, - }; - int error; - - lock_kernel(); - - error = server->rpc_ops->statfs(server, rootfh, &res); - buf->f_type = NFS_SUPER_MAGIC; - if (error < 0) - goto out_err; - - /* - * Current versions of glibc do not correctly handle the - * case where f_frsize != f_bsize. Eventually we want to - * report the value of wtmult in this field. - */ - buf->f_frsize = sb->s_blocksize; - - /* - * On most *nix systems, f_blocks, f_bfree, and f_bavail - * are reported in units of f_frsize. Linux hasn't had - * an f_frsize field in its statfs struct until recently, - * thus historically Linux's sys_statfs reports these - * fields in units of f_bsize. - */ - buf->f_bsize = sb->s_blocksize; - blockbits = sb->s_blocksize_bits; - blockres = (1 << blockbits) - 1; - buf->f_blocks = (res.tbytes + blockres) >> blockbits; - buf->f_bfree = (res.fbytes + blockres) >> blockbits; - buf->f_bavail = (res.abytes + blockres) >> blockbits; - - buf->f_files = res.tfiles; - buf->f_ffree = res.afiles; - - buf->f_namelen = server->namelen; - out: - unlock_kernel(); - return 0; - - out_err: - dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); - buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; - goto out; - -} - -static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) -{ - static struct proc_nfs_info { - int flag; - char *str; - char *nostr; - } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", "" }, - { NFS_MOUNT_NOACL, ",noacl", "" }, - { 0, NULL, NULL } - }; - struct proc_nfs_info *nfs_infop; - char buf[12]; - char *proto; - - seq_printf(m, ",vers=%d", nfss->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ || showdefaults) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ || showdefaults) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ || showdefaults) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ || showdefaults) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) - seq_puts(m, nfs_infop->str); - else - seq_puts(m, nfs_infop->nostr); - } - switch (nfss->client->cl_xprt->prot) { - case IPPROTO_TCP: - proto = "tcp"; - break; - case IPPROTO_UDP: - proto = "udp"; - break; - default: - snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); - proto = buf; - } - seq_printf(m, ",proto=%s", proto); - seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); - seq_printf(m, ",retrans=%u", nfss->retrans_count); -} - -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) -{ - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - - nfs_show_mount_options(m, nfss, 0); - - seq_puts(m, ",addr="); - seq_escape(m, nfss->hostname, " \t\n\\"); - - return 0; -} - -static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) -{ - int i, cpu; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - struct rpc_auth *auth = nfss->client->cl_auth; - struct nfs_iostats totals = { }; - - seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); - - /* - * Display all mount option settings - */ - seq_printf(m, "\n\topts:\t"); - seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); - seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); - seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); - seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); - nfs_show_mount_options(m, nfss, 1); - - seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); - - seq_printf(m, "\n\tcaps:\t"); - seq_printf(m, "caps=0x%x", nfss->caps); - seq_printf(m, ",wtmult=%d", nfss->wtmult); - seq_printf(m, ",dtsize=%d", nfss->dtsize); - seq_printf(m, ",bsize=%d", nfss->bsize); - seq_printf(m, ",namelen=%d", nfss->namelen); - -#ifdef CONFIG_NFS_V4 - if (nfss->rpc_ops->version == 4) { - seq_printf(m, "\n\tnfsv4:\t"); - seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); - seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); - seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); - } -#endif - - /* - * Display security flavor in effect for this mount - */ - seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); - if (auth->au_flavor) - seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); - - /* - * Display superblock I/O counters - */ - for_each_possible_cpu(cpu) { - struct nfs_iostats *stats; - - preempt_disable(); - stats = per_cpu_ptr(nfss->io_stats, cpu); - - for (i = 0; i < __NFSIOS_COUNTSMAX; i++) - totals.events[i] += stats->events[i]; - for (i = 0; i < __NFSIOS_BYTESMAX; i++) - totals.bytes[i] += stats->bytes[i]; - - preempt_enable(); - } - - seq_printf(m, "\n\tevents:\t"); - for (i = 0; i < __NFSIOS_COUNTSMAX; i++) - seq_printf(m, "%lu ", totals.events[i]); - seq_printf(m, "\n\tbytes:\t"); - for (i = 0; i < __NFSIOS_BYTESMAX; i++) - seq_printf(m, "%Lu ", totals.bytes[i]); - seq_printf(m, "\n"); - - rpc_print_iostats(m, nfss->client); - - return 0; -} - /** * nfs_sync_mapping - helper to flush all mmapped dirty data to disk */ @@ -1663,371 +1031,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) goto out_err; } -/* - * File system information - */ - -/* - * nfs_path - reconstruct the path given an arbitrary dentry - * @base - arbitrary string to prepend to the path - * @dentry - pointer to dentry - * @buffer - result buffer - * @buflen - length of buffer - * - * Helper function for constructing the path from the - * root dentry to an arbitrary hashed dentry. - * - * This is mainly for use in figuring out the path on the - * server side when automounting on top of an existing partition. - */ -static char *nfs_path(const char *base, const struct dentry *dentry, - char *buffer, ssize_t buflen) -{ - char *end = buffer+buflen; - int namelen; - - *--end = '\0'; - buflen--; - spin_lock(&dcache_lock); - while (!IS_ROOT(dentry)) { - namelen = dentry->d_name.len; - buflen -= namelen + 1; - if (buflen < 0) - goto Elong; - end -= namelen; - memcpy(end, dentry->d_name.name, namelen); - *--end = '/'; - dentry = dentry->d_parent; - } - spin_unlock(&dcache_lock); - namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; - buflen -= namelen; - if (buflen < 0) - goto Elong; - end -= namelen; - memcpy(end, base, namelen); - return end; -Elong: - return ERR_PTR(-ENAMETOOLONG); -} - -struct nfs_clone_mount { - const struct super_block *sb; - const struct dentry *dentry; - struct nfs_fh *fh; - struct nfs_fattr *fattr; - char *hostname; - char *mnt_path; - struct sockaddr_in *addr; - rpc_authflavor_t authflavor; -}; - -static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, - struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), - struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *)) -{ - struct nfs_server *server; - struct nfs_server *parent = NFS_SB(data->sb); - struct super_block *sb = ERR_PTR(-EINVAL); - void *err = ERR_PTR(-ENOMEM); - char *hostname; - int len; - - server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (server == NULL) - goto out_err; - memcpy(server, parent, sizeof(*server)); - hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; - len = strlen(hostname) + 1; - server->hostname = kmalloc(len, GFP_KERNEL); - if (server->hostname == NULL) - goto free_server; - memcpy(server->hostname, hostname, len); - if (rpciod_up() != 0) - goto free_hostname; - - sb = fill_sb(server, data); - if (IS_ERR((err = sb)) || sb->s_root) - goto kill_rpciod; - - server = fill_server(sb, data); - if (IS_ERR((err = server))) - goto out_deactivate; - return sb; -out_deactivate: - up_write(&sb->s_umount); - deactivate_super(sb); - return (struct super_block *)err; -kill_rpciod: - rpciod_down(); -free_hostname: - kfree(server->hostname); -free_server: - kfree(server); -out_err: - return (struct super_block *)err; -} - -static int nfs_set_super(struct super_block *s, void *data) -{ - s->s_fs_info = data; - return set_anon_super(s, data); -} - -static int nfs_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) - return 0; - if (old->addr.sin_port != server->addr.sin_port) - return 0; - return !nfs_compare_fh(&old->fh, &server->fh); -} - -static struct super_block *nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - int error; - struct nfs_server *server = NULL; - struct super_block *s; - struct nfs_fh *root; - struct nfs_mount_data *data = raw_data; - - s = ERR_PTR(-EINVAL); - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err; - } - if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err; - } - switch (data->version) { - case 1: - data->namlen = 0; - case 2: - data->bsize = 0; - case 3: - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: mount structure version %d does not support NFSv3\n", - __FUNCTION__, - data->version); - goto out_err; - } - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) { - dprintk("%s: mount structure version %d does not support strong security\n", - __FUNCTION__, - data->version); - goto out_err; - } - case 5: - memset(data->context, 0, sizeof(data->context)); - } -#ifndef CONFIG_NFS_V3 - /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - s = ERR_PTR(-EPROTONOSUPPORT); - if (data->flags & NFS_MOUNT_VER3) { - dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err; - } -#endif /* CONFIG_NFS_V3 */ - - s = ERR_PTR(-ENOMEM); - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - goto out_err; - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - root = &server->fh; - if (data->flags & NFS_MOUNT_VER3) - root->size = data->root.size; - else - root->size = NFS2_FHSIZE; - s = ERR_PTR(-EINVAL); - if (root->size > sizeof(root->data)) { - dprintk("%s: invalid root filehandle\n", __FUNCTION__); - goto out_err; - } - memcpy(root->data, data->root.data, root->size); - - /* We now require that the mount process passes the remote address */ - memcpy(&server->addr, &data->addr, sizeof(server->addr)); - if (server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote address!\n", - __FUNCTION__); - goto out_err; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_err; - } - - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - if (IS_ERR(s) || s->s_root) - goto out_rpciod_down; - - s->s_flags = flags; - - error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_rpciod_down: - rpciod_down(); -out_err: - kfree(server); - return s; -} - -static void nfs_kill_super(struct super_block *s) -{ - struct nfs_server *server = NFS_SB(s); - - kill_anon_super(s); - - if (!IS_ERR(server->client)) - rpc_shutdown_client(server->client); - if (!IS_ERR(server->client_sys)) - rpc_shutdown_client(server->client_sys); - if (!IS_ERR(server->client_acl)) - rpc_shutdown_client(server->client_acl); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - - rpciod_down(); /* release rpciod */ - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); -} - -static struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .get_sb = nfs_get_sb, - .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); - if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return sb; -} - -static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - struct nfs_server *parent = NFS_SB(data->sb); - struct inode *root_inode; - struct nfs_fsinfo fsinfo; - void *err = ERR_PTR(-ENOMEM); - - sb->s_op = data->sb->s_op; - sb->s_blocksize = data->sb->s_blocksize; - sb->s_blocksize_bits = data->sb->s_blocksize_bits; - sb->s_maxbytes = data->sb->s_maxbytes; - - server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - server->io_stats = nfs_alloc_iostats(); - if (server->io_stats == NULL) - goto out; - - server->client = rpc_clone_client(parent->client); - if (IS_ERR((err = server->client))) - goto out; - - if (!IS_ERR(parent->client_sys)) { - server->client_sys = rpc_clone_client(parent->client_sys); - if (IS_ERR((err = server->client_sys))) - goto out; - } - if (!IS_ERR(parent->client_acl)) { - server->client_acl = rpc_clone_client(parent->client_acl); - if (IS_ERR((err = server->client_acl))) - goto out; - } - root_inode = nfs_fhget(sb, data->fh, data->fattr); - if (!root_inode) - goto out; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_put_root; - fsinfo.fattr = data->fattr; - if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) - nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); - sb->s_root->d_op = server->rpc_ops->dentry_ops; - sb->s_flags |= MS_ACTIVE; - return server; -out_put_root: - iput(root_inode); -out: - return err; -} - -static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server); -} - -static struct file_system_type clone_nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .get_sb = nfs_clone_nfs_sb, - .kill_sb = nfs_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; #ifdef CONFIG_NFS_V4 -static void nfs4_clear_inode(struct inode *); - - -static struct super_operations nfs4_sops = { - .alloc_inode = nfs_alloc_inode, - .destroy_inode = nfs_destroy_inode, - .write_inode = nfs_write_inode, - .statfs = nfs_statfs, - .clear_inode = nfs4_clear_inode, - .umount_begin = nfs_umount_begin, - .show_options = nfs_show_options, - .show_stats = nfs_show_stats, -}; - /* * Clean out any remaining NFSv4 state that might be left over due * to open() calls that passed nfs_atomic_lookup, but failed to call * nfs_open(). */ -static void nfs4_clear_inode(struct inode *inode) +void nfs4_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2051,774 +1063,9 @@ static void nfs4_clear_inode(struct inode *inode) nfs4_close_state(state, state->state); } } - - -static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, - struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) -{ - struct nfs4_client *clp; - struct rpc_xprt *xprt = NULL; - struct rpc_clnt *clnt = NULL; - int err = -EIO; - - clp = nfs4_get_client(&server->addr.sin_addr); - if (!clp) { - dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); - return ERR_PTR(err); - } - - /* Now create transport and client */ - down_write(&clp->cl_sem); - if (IS_ERR(clp->cl_rpcclient)) { - xprt = xprt_create_proto(proto, &server->addr, timeparms); - if (IS_ERR(xprt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(xprt); - dprintk("%s: cannot create RPC transport. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - /* Bind to a reserved port! */ - xprt->resvport = 1; - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, flavor); - if (IS_ERR(clnt)) { - up_write(&clp->cl_sem); - err = PTR_ERR(clnt); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - clnt->cl_intr = 1; - clnt->cl_softrtry = 1; - clp->cl_rpcclient = clnt; - memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); - nfs_idmap_new(clp); - } - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - clnt = rpc_clone_client(clp->cl_rpcclient); - if (!IS_ERR(clnt)) - server->nfs4_state = clp; - up_write(&clp->cl_sem); - clp = NULL; - - if (IS_ERR(clnt)) { - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - return clnt; - } - - if (server->nfs4_state->cl_idmap == NULL) { - dprintk("%s: failed to create idmapper.\n", __FUNCTION__); - return ERR_PTR(-ENOMEM); - } - - if (clnt->cl_auth->au_flavor != flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(flavor, clnt); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return (struct rpc_clnt *)auth; - } - } - return clnt; - - out_fail: - if (clp) - nfs4_put_client(clp); - return ERR_PTR(err); -} - -static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) -{ - struct nfs_server *server; - struct rpc_timeout timeparms; - rpc_authflavor_t authflavour; - int err = -EIO; - - sb->s_blocksize_bits = 0; - sb->s_blocksize = 0; - server = NFS_SB(sb); - if (data->rsize != 0) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize != 0) - server->wsize = nfs_block_size(data->wsize, NULL); - server->flags = data->flags & NFS_MOUNT_FLAGMASK; - server->caps = NFS_CAP_ATOMIC_OPEN; - - server->acregmin = data->acregmin*HZ; - server->acregmax = data->acregmax*HZ; - server->acdirmin = data->acdirmin*HZ; - server->acdirmax = data->acdirmax*HZ; - - server->rpc_ops = &nfs_v4_clientops; - - nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); - - server->retrans_timeo = timeparms.to_initval; - server->retrans_count = timeparms.to_retries; - - /* Now create transport and client */ - authflavour = RPC_AUTH_UNIX; - if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen != 1) { - dprintk("%s: Invalid number of RPC auth flavours %d.\n", - __FUNCTION__, data->auth_flavourlen); - err = -EINVAL; - goto out_fail; - } - if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { - err = -EFAULT; - goto out_fail; - } - } - - server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); - if (IS_ERR(server->client)) { - err = PTR_ERR(server->client); - dprintk("%s: cannot create RPC client. Error = %d\n", - __FUNCTION__, err); - goto out_fail; - } - - sb->s_time_gran = 1; - - sb->s_op = &nfs4_sops; - err = nfs_sb_init(sb, authflavour); - - out_fail: - return err; -} - -static int nfs4_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data; - struct nfs_server *old = NFS_SB(sb); - - if (strcmp(server->hostname, old->hostname) != 0) - return 0; - if (strcmp(server->mnt_path, old->mnt_path) != 0) - return 0; - return 1; -} - -static void * -nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) -{ - void *p = NULL; - - if (!src->len) - return ERR_PTR(-EINVAL); - if (src->len < maxlen) - maxlen = src->len; - if (dst == NULL) { - p = dst = kmalloc(maxlen + 1, GFP_KERNEL); - if (p == NULL) - return ERR_PTR(-ENOMEM); - } - if (copy_from_user(dst, src->data, maxlen)) { - kfree(p); - return ERR_PTR(-EFAULT); - } - dst[maxlen] = '\0'; - return dst; -} - -static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - int error; - struct nfs_server *server; - struct super_block *s; - struct nfs4_mount_data *data = raw_data; - void *p; - - if (data == NULL) { - dprintk("%s: missing data argument\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { - dprintk("%s: bad mount version\n", __FUNCTION__); - return ERR_PTR(-EINVAL); - } - - server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); - if (!server) - return ERR_PTR(-ENOMEM); - /* Zero out the NFS state stuff */ - init_nfsv4_state(server); - server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); - - p = nfs_copy_user_string(NULL, &data->hostname, 256); - if (IS_ERR(p)) - goto out_err; - server->hostname = p; - - p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); - if (IS_ERR(p)) - goto out_err; - server->mnt_path = p; - - p = nfs_copy_user_string(server->ip_addr, &data->client_addr, - sizeof(server->ip_addr) - 1); - if (IS_ERR(p)) - goto out_err; - - /* We now require that the mount process passes the remote address */ - if (data->host_addrlen != sizeof(server->addr)) { - s = ERR_PTR(-EINVAL); - goto out_free; - } - if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - s = ERR_PTR(-EFAULT); - goto out_free; - } - if (server->addr.sin_family != AF_INET || - server->addr.sin_addr.s_addr == INADDR_ANY) { - dprintk("%s: mount program didn't pass remote IP address!\n", - __FUNCTION__); - s = ERR_PTR(-EINVAL); - goto out_free; - } - - /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); - goto out_free; - } - - s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) - goto out_free; - - s->s_flags = flags; - - error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); - if (error) { - up_write(&s->s_umount); - deactivate_super(s); - return ERR_PTR(error); - } - s->s_flags |= MS_ACTIVE; - return s; -out_err: - s = (struct super_block *)p; -out_free: - kfree(server->mnt_path); - kfree(server->hostname); - kfree(server); - return s; -} - -static void nfs4_kill_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - nfs_return_all_delegations(sb); - kill_anon_super(sb); - - nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL && !IS_ERR(server->client)) - rpc_shutdown_client(server->client); - - destroy_nfsv4_state(server); - - rpciod_down(); - - nfs_free_iostats(server->io_stats); - kfree(server->hostname); - kfree(server); - nfs_release_automount_timer(); -} - -static struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .get_sb = nfs4_get_sb, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; -static int param_set_port(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) - return -EINVAL; - *((int *)kp->arg) = num; - return 0; -} - -module_param_call(callback_tcpport, param_set_port, param_get_int, - &nfs_callback_set_tcpport, 0644); - -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); - -/* Constructs the SERVER-side path */ -static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) -{ - return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); -} - -static inline char *nfs4_dup_path(const struct dentry *dentry) -{ - char *page = (char *) __get_free_page(GFP_USER); - char *path; - - path = nfs4_path(dentry, page, PAGE_SIZE); - if (!IS_ERR(path)) { - int len = PAGE_SIZE + page - path; - char *tmp = path; - - path = kmalloc(len, GFP_KERNEL); - if (path) - memcpy(path, tmp, len); - else - path = ERR_PTR(-ENOMEM); - } - free_page((unsigned long)page); - return path; -} - -static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - const struct dentry *dentry = data->dentry; - struct nfs4_client *clp = server->nfs4_state; - struct super_block *sb; - - server->fsid = data->fattr->fsid; - nfs_copy_fh(&server->fh, data->fh); - server->mnt_path = nfs4_dup_path(dentry); - if (IS_ERR(server->mnt_path)) { - sb = (struct super_block *)server->mnt_path; - goto err; - } - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - nfs4_server_capabilities(server, &server->fh); - - down_write(&clp->cl_sem); - atomic_inc(&clp->cl_count); - list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); - up_write(&clp->cl_sem); - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} - -static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server); -} - -static struct file_system_type clone_nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .get_sb = nfs_clone_nfs4_sb, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, struct nfs_clone_mount *mountdata) -{ - struct vfsmount *mnt = NULL; - switch (server->rpc_ops->version) { - case 2: - case 3: - mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); - break; - case 4: - mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); - } - return mnt; -} - -#define nfs4_init_once(nfsi) \ - do { \ - INIT_LIST_HEAD(&(nfsi)->open_states); \ - nfsi->delegation = NULL; \ - nfsi->delegation_state = 0; \ - init_rwsem(&nfsi->rwsem); \ - } while(0) - -static inline int register_nfs4fs(void) -{ - int ret; - - ret = nfs_register_sysctl(); - if (ret != 0) - return ret; - ret = register_filesystem(&nfs4_fs_type); - if (ret != 0) - nfs_unregister_sysctl(); - return ret; -} - -static inline void unregister_nfs4fs(void) -{ - unregister_filesystem(&nfs4_fs_type); - nfs_unregister_sysctl(); -} -#else -#define nfs4_fill_sb(a,b) ERR_PTR(-EINVAL) -#define nfs4_fill_super(a,b) ERR_PTR(-EINVAL) -#define nfs4_init_once(nfsi) \ - do { } while (0) -#define register_nfs4fs() (0) -#define unregister_nfs4fs() -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, struct nfs_clone_mount *mountdata) -{ - return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); -} #endif -static inline char *nfs_devname(const struct vfsmount *mnt_parent, - const struct dentry *dentry, - char *buffer, ssize_t buflen) -{ - return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); -} - -/** - * nfs_do_submount - set up mountpoint when crossing a filesystem boundary - * @mnt_parent - mountpoint of parent directory - * @dentry - parent directory - * @fh - filehandle for new root dentry - * @fattr - attributes for new root inode - * - */ -struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, - const struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr) -{ - struct nfs_clone_mount mountdata = { - .sb = mnt_parent->mnt_sb, - .dentry = dentry, - .fh = fh, - .fattr = fattr, - }; - struct vfsmount *mnt = ERR_PTR(-ENOMEM); - char *page = (char *) __get_free_page(GFP_USER); - char *devname; - - dprintk("%s: submounting on %s/%s\n", __FUNCTION__, - dentry->d_parent->d_name.name, - dentry->d_name.name); - if (page == NULL) - goto out; - devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); - mnt = (struct vfsmount *)devname; - if (IS_ERR(devname)) - goto free_page; - mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); -free_page: - free_page((unsigned long)page); -out: - dprintk("%s: done\n", __FUNCTION__); - return mnt; -} - -#ifdef CONFIG_NFS_V4 -/* Check if fs_root is valid */ -static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, char *buffer, ssize_t buflen) -{ - char *end = buffer + buflen; - int n; - - *--end = '\0'; - buflen--; - - n = pathname->ncomponents; - while (--n >= 0) { - struct nfs4_string *component = &pathname->components[n]; - buflen -= component->len + 1; - if (buflen < 0) - goto Elong; - end -= component->len; - memcpy(end, component->data, component->len); - *--end = '/'; - } - return end; -Elong: - return ERR_PTR(-ENAMETOOLONG); -} - -/* Check if the string represents a "valid" IPv4 address */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} - -static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) -{ - struct super_block *sb = ERR_PTR(-ENOMEM); - int len; - - len = strlen(data->mnt_path) + 1; - server->mnt_path = kmalloc(len, GFP_KERNEL); - if (server->mnt_path == NULL) - goto err; - memcpy(server->mnt_path, data->mnt_path, len); - memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); - - sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); - if (IS_ERR(sb) || sb->s_root) - goto free_path; - return sb; -free_path: - kfree(server->mnt_path); -err: - server->mnt_path = NULL; - return sb; -} - -static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) -{ - struct nfs_server *server = NFS_SB(sb); - struct rpc_timeout timeparms; - int proto, timeo, retrans; - void *err; - - proto = IPPROTO_TCP; - /* Since we are following a referral and there may be alternatives, - set the timeouts and retries to low values */ - timeo = 2; - retrans = 1; - nfs_init_timeout_values(&timeparms, proto, timeo, retrans); - - server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); - if (IS_ERR((err = server->client))) - goto out_err; - - sb->s_time_gran = 1; - sb->s_op = &nfs4_sops; - err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); - if (!IS_ERR(err)) - return server; -out_err: - return (struct nfs_server *)err; -} - -static struct super_block *nfs_referral_nfs4_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server); -} - -static struct file_system_type nfs_referral_nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .get_sb = nfs_referral_nfs4_sb, - .kill_sb = nfs4_kill_super, - .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, -}; - -/** - * nfs_follow_referral - set up mountpoint when hitting a referral on moved error - * @mnt_parent - mountpoint of parent directory - * @dentry - parent directory - * @fspath - fs path returned in fs_locations - * @mntpath - mount path to new server - * @hostname - hostname of new server - * @addr - host addr of new server - * - */ -struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, - const struct dentry *dentry, struct nfs4_fs_locations *locations) -{ - struct vfsmount *mnt = ERR_PTR(-ENOENT); - struct nfs_clone_mount mountdata = { - .sb = mnt_parent->mnt_sb, - .dentry = dentry, - .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, - }; - char *page, *page2; - char *path, *fs_path; - char *devname; - int loc, s; - - if (locations == NULL || locations->nlocations <= 0) - goto out; - - dprintk("%s: referral at %s/%s\n", __FUNCTION__, - dentry->d_parent->d_name.name, dentry->d_name.name); - - /* Ensure fs path is a prefix of current dentry path */ - page = (char *) __get_free_page(GFP_USER); - if (page == NULL) - goto out; - page2 = (char *) __get_free_page(GFP_USER); - if (page2 == NULL) - goto out; - - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) - goto out_free; - - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) - goto out_free; - - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { - dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); - goto out_free; - } - - devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); - if (IS_ERR(devname)) { - mnt = (struct vfsmount *)devname; - goto out_free; - } - - loc = 0; - while (loc < locations->nlocations && IS_ERR(mnt)) { - struct nfs4_fs_location *location = &locations->locations[loc]; - char *mnt_path; - - if (location == NULL || location->nservers <= 0 || - location->rootpath.ncomponents == 0) { - loc++; - continue; - } - - mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); - if (IS_ERR(mnt_path)) { - loc++; - continue; - } - mountdata.mnt_path = mnt_path; - - s = 0; - while (s < location->nservers) { - struct sockaddr_in addr = {}; - - if (location->servers[s].len <= 0 || - valid_ipaddr4(location->servers[s].data) < 0) { - s++; - continue; - } - - mountdata.hostname = location->servers[s].data; - addr.sin_addr.s_addr = in_aton(mountdata.hostname); - addr.sin_family = AF_INET; - addr.sin_port = htons(NFS_PORT); - mountdata.addr = &addr; - - mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); - if (!IS_ERR(mnt)) { - break; - } - s++; - } - loc++; - } - -out_free: - free_page((unsigned long)page); - free_page((unsigned long)page2); -out: - dprintk("%s: done\n", __FUNCTION__); - return mnt; -} - -/* - * nfs_do_refmount - handle crossing a referral on server - * @dentry - dentry of referral - * @nd - nameidata info - * - */ -struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) -{ - struct vfsmount *mnt = ERR_PTR(-ENOENT); - struct dentry *parent; - struct nfs4_fs_locations *fs_locations = NULL; - struct page *page; - int err; - - /* BUG_ON(IS_ROOT(dentry)); */ - dprintk("%s: enter\n", __FUNCTION__); - - page = alloc_page(GFP_KERNEL); - if (page == NULL) - goto out; - - fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (fs_locations == NULL) - goto out_free; - - /* Get locations */ - parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); - err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); - dput(parent); - if (err != 0 || fs_locations->nlocations <= 0 || - fs_locations->fs_path.ncomponents <= 0) - goto out_free; - - mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); -out_free: - __free_page(page); - kfree(fs_locations); -out: - dprintk("%s: done\n", __FUNCTION__); - return mnt; -} -#else -struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) -{ - return ERR_PTR(-ENOENT); -} -#endif - -extern int nfs_init_nfspagecache(void); -extern void nfs_destroy_nfspagecache(void); -extern int nfs_init_readpagecache(void); -extern void nfs_destroy_readpagecache(void); -extern int nfs_init_writepagecache(void); -extern void nfs_destroy_writepagecache(void); -#ifdef CONFIG_NFS_DIRECTIO -extern int nfs_init_directcache(void); -extern void nfs_destroy_directcache(void); -#endif - -static kmem_cache_t * nfs_inode_cachep; - -static struct inode *nfs_alloc_inode(struct super_block *sb) +struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); @@ -2837,11 +1084,19 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) return &nfsi->vfs_inode; } -static void nfs_destroy_inode(struct inode *inode) +void nfs_destroy_inode(struct inode *inode) { kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } +#define nfs4_init_once(nfsi) \ + do { \ + INIT_LIST_HEAD(&(nfsi)->open_states); \ + nfsi->delegation = NULL; \ + nfsi->delegation_state = 0; \ + init_rwsem(&nfsi->rwsem); \ + } while(0) + static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; @@ -2862,7 +1117,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) } } -static int nfs_init_inodecache(void) +static int __init nfs_init_inodecache(void) { nfs_inode_cachep = kmem_cache_create("nfs_inode_cache", sizeof(struct nfs_inode), @@ -2875,7 +1130,7 @@ static int nfs_init_inodecache(void) return 0; } -static void nfs_destroy_inodecache(void) +static void __exit nfs_destroy_inodecache(void) { if (kmem_cache_destroy(nfs_inode_cachep)) printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n"); @@ -2904,29 +1159,22 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_NFS_DIRECTIO err = nfs_init_directcache(); if (err) goto out0; -#endif #ifdef CONFIG_PROC_FS rpc_proc_register(&nfs_rpcstat); #endif - err = register_filesystem(&nfs_fs_type); - if (err) - goto out; - if ((err = register_nfs4fs()) != 0) + if ((err = register_nfs_fs()) != 0) goto out; return 0; out: #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif -#ifdef CONFIG_NFS_DIRECTIO nfs_destroy_directcache(); out0: -#endif nfs_destroy_writepagecache(); out1: nfs_destroy_readpagecache(); @@ -2940,9 +1188,7 @@ out4: static void __exit exit_nfs_fs(void) { -#ifdef CONFIG_NFS_DIRECTIO nfs_destroy_directcache(); -#endif nfs_destroy_writepagecache(); nfs_destroy_readpagecache(); nfs_destroy_inodecache(); @@ -2950,8 +1196,7 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif - unregister_filesystem(&nfs_fs_type); - unregister_nfs4fs(); + unregister_nfs_fs(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h new file mode 100644 index 00000000000..5e51c4535b6 --- /dev/null +++ b/fs/nfs/internal.h @@ -0,0 +1,179 @@ +/* + * NFS internal definitions + */ + +#include + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; + char *hostname; + char *mnt_path; + struct sockaddr_in *addr; + rpc_authflavor_t authflavor; +}; + +/* namespace-nfs4.c */ +#ifdef CONFIG_NFS_V4 +extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); +#else +static inline +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + return ERR_PTR(-ENOENT); +} +#endif + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +/* pagelist.c */ +extern int __init nfs_init_nfspagecache(void); +extern void __exit nfs_destroy_nfspagecache(void); +extern int __init nfs_init_readpagecache(void); +extern void __exit nfs_destroy_readpagecache(void); +extern int __init nfs_init_writepagecache(void); +extern void __exit nfs_destroy_writepagecache(void); + +#ifdef CONFIG_NFS_DIRECTIO +extern int __init nfs_init_directcache(void); +extern void __exit nfs_destroy_directcache(void); +#else +#define nfs_init_directcache() (0) +#define nfs_destroy_directcache() do {} while(0) +#endif + +/* nfs2xdr.c */ +extern struct rpc_procinfo nfs_procedures[]; +extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); + +/* nfs3xdr.c */ +extern struct rpc_procinfo nfs3_procedures[]; +extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); + +/* nfs4xdr.c */ +extern int nfs_stat_to_errno(int); +extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); + +/* nfs4proc.c */ +extern struct rpc_procinfo nfs4_procedures[]; + +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs4_fs_locations *fs_locations, + struct page *page); + +/* inode.c */ +extern struct inode *nfs_alloc_inode(struct super_block *sb); +extern void nfs_destroy_inode(struct inode *); +extern int nfs_write_inode(struct inode *,int); +extern void nfs_clear_inode(struct inode *); +#ifdef CONFIG_NFS_V4 +extern void nfs4_clear_inode(struct inode *); +#endif + +/* super.c */ +extern struct file_system_type nfs_referral_nfs4_fs_type; +extern struct file_system_type clone_nfs_fs_type; +#ifdef CONFIG_NFS_V4 +extern struct file_system_type clone_nfs4_fs_type; +#endif +#ifdef CONFIG_PROC_FS +extern struct rpc_stat nfs_rpcstat; +#endif +extern int __init register_nfs_fs(void); +extern void __exit unregister_nfs_fs(void); + +/* namespace.c */ +extern char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen); + +/* + * Determine the mount path as a string + */ +static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +} + +/* + * Determine the device name as a string + */ +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/* + * Determine the actual block size (and log2 thereof) + */ +static inline +unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) +{ + /* make sure blocksize is a power of two */ + if ((bsize & (bsize - 1)) || nrbitsp) { + unsigned char nrbits; + + for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + ; + bsize = 1 << nrbits; + if (nrbitsp) + *nrbitsp = nrbits; + } + + return bsize; +} + +/* + * Calculate the number of 512byte blocks used. + */ +static inline unsigned long nfs_calc_block_size(u64 tsize) +{ + loff_t used = (tsize + 511) >> 9; + return (used > ULONG_MAX) ? ULONG_MAX : used; +} + +/* + * Compute and set NFS server blocksize + */ +static inline +unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) +{ + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; + + return nfs_block_bits(bsize, nrbitsp); +} + +/* + * Determine the maximum file size for a superblock + */ +static inline +void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + +/* + * Check if the string represents a "valid" IPv4 address + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 8ca44b7b25c..19b98ca468e 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -15,14 +15,63 @@ #include #include #include +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_VFS -LIST_HEAD(nfs_automount_list); static void nfs_expire_automounts(void *list); + +LIST_HEAD(nfs_automount_list); static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); int nfs_mountpoint_expiry_timeout = 500 * HZ; +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + /* * nfs_follow_mountpoint - handle crossing a mountpoint on the server * @dentry - dentry of mountpoint @@ -117,3 +166,64 @@ void nfs_release_automount_timer(void) flush_scheduled_work(); } } + +/* + * Clone a mountpoint of the appropriate type + */ +static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, + struct nfs_clone_mount *mountdata) +{ +#ifdef CONFIG_NFS_V4 + struct vfsmount *mnt = NULL; + switch (server->rpc_ops->version) { + case 2: + case 3: + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); + break; + case 4: + mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); + } + return mnt; +#else + return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); +#endif +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + mnt = (struct vfsmount *)devname; + if (IS_ERR(devname)) + goto free_page; + mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); +free_page: + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index a7ed88f97a1..4a006f81666 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -27,8 +27,6 @@ #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ -extern int nfs_stat_to_errno(int stat); - /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cf186f0d2b3..7143b1f82ce 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -20,11 +20,10 @@ #include #include "iostat.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC -extern struct rpc_procinfo nfs3_procedures[]; - /* A wrapper to handle the EJUKEBOX error message */ static int nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) @@ -809,8 +808,6 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); - static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index f70eee2cac0..0250269e975 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -22,14 +22,13 @@ #include #include #include +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -extern int nfs_stat_to_errno(int); - /* * Declare the space requirements for NFS arguments and replies as * number of 32bit-words diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c new file mode 100644 index 00000000000..ea38d27b74e --- /dev/null +++ b/fs/nfs/nfs4namespace.c @@ -0,0 +1,201 @@ +/* + * linux/fs/nfs/nfs4namespace.c + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4 namespace + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* + * Check if fs_root is valid + */ +static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, + char *buffer, ssize_t buflen) +{ + char *end = buffer + buflen; + int n; + + *--end = '\0'; + buflen--; + + n = pathname->ncomponents; + while (--n >= 0) { + struct nfs4_string *component = &pathname->components[n]; + buflen -= component->len + 1; + if (buflen < 0) + goto Elong; + end -= component->len; + memcpy(end, component->data, component->len); + *--end = '/'; + } + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + + +/** + * nfs_follow_referral - set up mountpoint when hitting a referral on moved error + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fspath - fs path returned in fs_locations + * @mntpath - mount path to new server + * @hostname - hostname of new server + * @addr - host addr of new server + * + */ +static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs4_fs_locations *locations) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, + }; + char *page, *page2; + char *path, *fs_path; + char *devname; + int loc, s; + + if (locations == NULL || locations->nlocations <= 0) + goto out; + + dprintk("%s: referral at %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, dentry->d_name.name); + + /* Ensure fs path is a prefix of current dentry path */ + page = (char *) __get_free_page(GFP_USER); + if (page == NULL) + goto out; + page2 = (char *) __get_free_page(GFP_USER); + if (page2 == NULL) + goto out; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (IS_ERR(path)) + goto out_free; + + fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); + if (IS_ERR(fs_path)) + goto out_free; + + if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path); + goto out_free; + } + + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (IS_ERR(devname)) { + mnt = (struct vfsmount *)devname; + goto out_free; + } + + loc = 0; + while (loc < locations->nlocations && IS_ERR(mnt)) { + struct nfs4_fs_location *location = &locations->locations[loc]; + char *mnt_path; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) { + loc++; + continue; + } + + mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); + if (IS_ERR(mnt_path)) { + loc++; + continue; + } + mountdata.mnt_path = mnt_path; + + s = 0; + while (s < location->nservers) { + struct sockaddr_in addr = {}; + + if (location->servers[s].len <= 0 || + valid_ipaddr4(location->servers[s].data) < 0) { + s++; + continue; + } + + mountdata.hostname = location->servers[s].data; + addr.sin_addr.s_addr = in_aton(mountdata.hostname); + addr.sin_family = AF_INET; + addr.sin_port = htons(NFS_PORT); + mountdata.addr = &addr; + + mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); + if (!IS_ERR(mnt)) { + break; + } + s++; + } + loc++; + } + +out_free: + free_page((unsigned long)page); + free_page((unsigned long)page2); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + +/* + * nfs_do_refmount - handle crossing a referral on server + * @dentry - dentry of referral + * @nd - nameidata info + * + */ +struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + struct dentry *parent; + struct nfs4_fs_locations *fs_locations = NULL; + struct page *page; + int err; + + /* BUG_ON(IS_ROOT(dentry)); */ + dprintk("%s: enter\n", __FUNCTION__); + + page = alloc_page(GFP_KERNEL); + if (page == NULL) + goto out; + + fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (fs_locations == NULL) + goto out_free; + + /* Get locations */ + parent = dget_parent(dentry); + dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); + err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); + dput(parent); + if (err != 0 || fs_locations->nlocations <= 0 || + fs_locations->fs_path.ncomponents <= 0) + goto out_free; + + mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); +out_free: + __free_page(page); + kfree(fs_locations); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3300e35d74a..b4916b09219 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -65,8 +65,6 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *) static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); -extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); -extern struct rpc_procinfo nfs4_procedures[]; /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 656481c0daa..ef9429643eb 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -378,7 +378,7 @@ out: return res; } -int nfs_init_nfspagecache(void) +int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", sizeof(struct nfs_page), @@ -390,7 +390,7 @@ int nfs_init_nfspagecache(void) return 0; } -void nfs_destroy_nfspagecache(void) +void __exit nfs_destroy_nfspagecache(void) { if (kmem_cache_destroy(nfs_page_cachep)) printk(KERN_INFO "nfs_page: not all structures were freed\n"); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 9dd85cac2df..b3899ea3229 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -44,11 +44,10 @@ #include #include #include +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC -extern struct rpc_procinfo nfs_procedures[]; - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -611,8 +610,6 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); - static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (task->tk_status >= 0) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index fd9018c692b..41c2ffee24f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -694,7 +694,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, return ret; } -int nfs_init_readpagecache(void) +int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", sizeof(struct nfs_read_data), @@ -711,7 +711,7 @@ int nfs_init_readpagecache(void) return 0; } -void nfs_destroy_readpagecache(void) +void __exit nfs_destroy_readpagecache(void) { mempool_destroy(nfs_rdata_mempool); if (kmem_cache_destroy(nfs_rdata_cachep)) diff --git a/fs/nfs/super.c b/fs/nfs/super.c new file mode 100644 index 00000000000..4acd3ee9642 --- /dev/null +++ b/fs/nfs/super.c @@ -0,0 +1,1468 @@ +/* + * linux/fs/nfs/super.c + * + * Copyright (C) 1992 Rick Sladkey + * + * nfs superblock handling functions + * + * Modularised by Alan Cox , while hacking some + * experimental NFS changes. Modularisation taken straight from SYS5 fs. + * + * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. + * J.S.Peatfield@damtp.cam.ac.uk + * + * Split from inode.c by David Howells + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "nfs4_fs.h" +#include "callback.h" +#include "delegation.h" +#include "iostat.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +/* Maximum number of readahead requests + * FIXME: this should really be a sysctl so that users may tune it to suit + * their needs. People that do NFS over a slow network, might for + * instance want to reduce it to something closer to 1 for improved + * interactive response. + */ +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) + +/* + * RPC cruft for NFS + */ +static struct rpc_version * nfs_version[] = { + NULL, + NULL, + &nfs_version2, +#if defined(CONFIG_NFS_V3) + &nfs_version3, +#elif defined(CONFIG_NFS_V4) + NULL, +#endif +#if defined(CONFIG_NFS_V4) + &nfs_version4, +#endif +}; + +static struct rpc_program nfs_program = { + .name = "nfs", + .number = NFS_PROGRAM, + .nrvers = ARRAY_SIZE(nfs_version), + .version = nfs_version, + .stats = &nfs_rpcstat, + .pipe_dir_name = "/nfs", +}; + +struct rpc_stat nfs_rpcstat = { + .program = &nfs_program +}; + + +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = ARRAY_SIZE(nfsacl_version), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +static void nfs_umount_begin(struct vfsmount *, int); +static int nfs_statfs(struct super_block *, struct kstatfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); +static struct super_block *nfs_get_sb(struct file_system_type *, int, const char *, void *); +static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static void nfs_kill_super(struct super_block *); + +static struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_get_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = nfs_clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct super_operations nfs_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .statfs = nfs_statfs, + .clear_inode = nfs_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, +}; + +#ifdef CONFIG_NFS_V4 +static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static struct super_block *nfs_referral_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data); +static void nfs4_kill_super(struct super_block *sb); + +static struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type clone_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs_clone_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +struct file_system_type nfs_referral_nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs_referral_nfs4_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, + .show_stats = nfs_show_stats, +}; +#endif + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; + +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); + +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + +/* + * Register the NFS filesystems + */ +int __init register_nfs_fs(void) +{ + int ret; + + ret = register_filesystem(&nfs_fs_type); + if (ret < 0) + goto error_0; + +#ifdef CONFIG_NFS_V4 + ret = nfs_register_sysctl(); + if (ret < 0) + goto error_1; + ret = register_filesystem(&nfs4_fs_type); + if (ret < 0) + goto error_2; +#endif + return 0; + +#ifdef CONFIG_NFS_V4 +error_2: + nfs_unregister_sysctl(); +error_1: + unregister_filesystem(&nfs_fs_type); +#endif +error_0: + return ret; +} + +/* + * Unregister the NFS filesystems + */ +void __exit unregister_nfs_fs(void) +{ +#ifdef CONFIG_NFS_V4 + unregister_filesystem(&nfs4_fs_type); + nfs_unregister_sysctl(); +#endif + unregister_filesystem(&nfs_fs_type); +} + +/* + * Deliver file system statistics to userspace + */ +static int nfs_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct nfs_server *server = NFS_SB(sb); + unsigned char blockbits; + unsigned long blockres; + struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); + struct nfs_fattr fattr; + struct nfs_fsstat res = { + .fattr = &fattr, + }; + int error; + + lock_kernel(); + + error = server->rpc_ops->statfs(server, rootfh, &res); + buf->f_type = NFS_SUPER_MAGIC; + if (error < 0) + goto out_err; + + /* + * Current versions of glibc do not correctly handle the + * case where f_frsize != f_bsize. Eventually we want to + * report the value of wtmult in this field. + */ + buf->f_frsize = sb->s_blocksize; + + /* + * On most *nix systems, f_blocks, f_bfree, and f_bavail + * are reported in units of f_frsize. Linux hasn't had + * an f_frsize field in its statfs struct until recently, + * thus historically Linux's sys_statfs reports these + * fields in units of f_bsize. + */ + buf->f_bsize = sb->s_blocksize; + blockbits = sb->s_blocksize_bits; + blockres = (1 << blockbits) - 1; + buf->f_blocks = (res.tbytes + blockres) >> blockbits; + buf->f_bfree = (res.fbytes + blockres) >> blockbits; + buf->f_bavail = (res.abytes + blockres) >> blockbits; + + buf->f_files = res.tfiles; + buf->f_ffree = res.afiles; + + buf->f_namelen = server->namelen; + out: + unlock_kernel(); + return 0; + + out_err: + dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); + buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; + goto out; + +} + +/* + * Describe the mount options in force on this server representation + */ +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) +{ + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, + { 0, NULL, NULL } + }; + struct proc_nfs_info *nfs_infop; + char buf[12]; + char *proto; + + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ || showdefaults) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ || showdefaults) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ || showdefaults) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ || showdefaults) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + switch (nfss->client->cl_xprt->prot) { + case IPPROTO_TCP: + proto = "tcp"; + break; + case IPPROTO_UDP: + proto = "udp"; + break; + default: + snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); + proto = buf; + } + seq_printf(m, ",proto=%s", proto); + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); +} + +/* + * Describe the mount options on this VFS mountpoint + */ +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + + seq_puts(m, ",addr="); + seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +/* + * Present statistical information for this VFS mountpoint + */ +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for_each_possible_cpu(cpu) { + struct nfs_iostats *stats; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + seq_printf(m, "\n"); + + rpc_print_iostats(m, nfss->client); + + return 0; +} + +/* + * Begin unmount by attempting to remove all automounted mountpoints we added + * in response to traversals + */ +static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) +{ + struct nfs_server *server; + struct rpc_clnt *rpc; + + shrink_submounts(vfsmnt, &nfs_automount_list); + if (!(flags & MNT_FORCE)) + return; + /* -EIO all pending I/O */ + server = NFS_SB(vfsmnt->mnt_sb); + rpc = server->client; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = server->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); +} + +/* + * Obtain the root inode of the file system. + */ +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) +{ + struct nfs_server *server = NFS_SB(sb); + int error; + + error = server->rpc_ops->getroot(server, rootfh, fsinfo); + if (error < 0) { + dprintk("nfs_get_root: getattr error = %d\n", -error); + return ERR_PTR(error); + } + + server->fsid = fsinfo->fattr->fsid; + return nfs_fhget(sb, rootfh, fsinfo->fattr); +} + +/* + * Do NFS version-independent mount processing, and sanity checking + */ +static int +nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) +{ + struct nfs_server *server; + struct inode *root_inode; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { + .fattr = &fattr, + }; + struct nfs_pathconf pathinfo = { + .fattr = &fattr, + }; + int no_root_error = 0; + unsigned long max_rpc_payload; + + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + server = NFS_SB(sb); + + sb->s_magic = NFS_SUPER_MAGIC; + + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + return -ENOMEM; + + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); + /* Did getting the root inode fail? */ + if (IS_ERR(root_inode)) { + no_root_error = PTR_ERR(root_inode); + goto out_no_root; + } + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) { + no_root_error = -ENOMEM; + goto out_no_root; + } + sb->s_root->d_op = server->rpc_ops->dentry_ops; + + /* mount time stamp, in seconds */ + server->mount_time = jiffies; + + /* Get some general file system info */ + if (server->namelen == 0 && + server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; + /* Work out a lot of parameters */ + if (server->rsize == 0) + server->rsize = nfs_block_size(fsinfo.rtpref, NULL); + if (server->wsize == 0) + server->wsize = nfs_block_size(fsinfo.wtpref, NULL); + + if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) + server->rsize = nfs_block_size(fsinfo.rtmax, NULL); + if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) + server->wsize = nfs_block_size(fsinfo.wtmax, NULL); + + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; + server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); + + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); + if (server->dtsize > PAGE_CACHE_SIZE) + server->dtsize = PAGE_CACHE_SIZE; + if (server->dtsize > server->rsize) + server->dtsize = server->rsize; + + if (server->flags & NFS_MOUNT_NOAC) { + server->acregmin = server->acregmax = 0; + server->acdirmin = server->acdirmax = 0; + sb->s_flags |= MS_SYNCHRONOUS; + } + server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; + + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + + server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; + server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; + + /* We're airborne Set socket buffersize */ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + return 0; + /* Yargs. It didn't work out. */ +out_no_root: + dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error); + if (!IS_ERR(root_inode)) + iput(root_inode); + return no_root_error; +} + +/* + * Initialise the timeout values for a connection + */ +static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) +{ + to->to_initval = timeo * HZ / 10; + to->to_retries = retrans; + if (!to->to_retries) + to->to_retries = 2; + + switch (proto) { + case IPPROTO_TCP: + if (!to->to_initval) + to->to_initval = 60 * HZ; + if (to->to_initval > NFS_MAX_TCP_TIMEOUT) + to->to_initval = NFS_MAX_TCP_TIMEOUT; + to->to_increment = to->to_initval; + to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); + to->to_exponential = 0; + break; + case IPPROTO_UDP: + default: + if (!to->to_initval) + to->to_initval = 11 * HZ / 10; + if (to->to_initval > NFS_MAX_UDP_TIMEOUT) + to->to_initval = NFS_MAX_UDP_TIMEOUT; + to->to_maxval = NFS_MAX_UDP_TIMEOUT; + to->to_exponential = 1; + break; + } +} + +/* + * Create an RPC client handle. + */ +static struct rpc_clnt * +nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) +{ + struct rpc_timeout timeparms; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); + + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + + /* create transport and client */ + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (IS_ERR(xprt)) { + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + return (struct rpc_clnt *)xprt; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, data->pseudoflavor); + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); + goto out_fail; + } + + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + + return clnt; + +out_fail: + return clnt; +} + +/* + * Clone a server record + */ +static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct nfs_server *parent = NFS_SB(data->sb); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + void *err = ERR_PTR(-ENOMEM); + + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto out; + + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out; + + if (!IS_ERR(parent->client_sys)) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out; + } + if (!IS_ERR(parent->client_acl)) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return server; +out_put_root: + iput(root_inode); +out: + return err; +} + +/* + * Copy an existing superblock and attach revised data + */ +static struct super_block *nfs_clone_generic_sb(struct nfs_clone_mount *data, + struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *), + struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *)) +{ + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + void *err = ERR_PTR(-ENOMEM); + char *hostname; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + hostname = (data->hostname != NULL) ? data->hostname : parent->hostname; + len = strlen(hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, hostname, len); + if (rpciod_up() != 0) + goto free_hostname; + + sb = fill_sb(server, data); + if (IS_ERR((err = sb)) || sb->s_root) + goto kill_rpciod; + + server = fill_server(sb, data); + if (IS_ERR((err = server))) + goto out_deactivate; + return sb; +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return (struct super_block *)err; +kill_rpciod: + rpciod_down(); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return (struct super_block *)err; +} + +/* + * Set up an NFS2/3 superblock + * + * The way this works is that the mount process passes a structure + * in the data argument which contains the server's IP address + * and the root file handle obtained from the server's mount + * daemon. We stash these away in the private superblock fields. + */ +static int +nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) +{ + struct nfs_server *server; + rpc_authflavor_t authflavor; + + server = NFS_SB(sb); + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + if (data->bsize) + sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (data->rsize) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + /* Start lockd here, before we might error out */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + + server->namelen = data->namlen; + server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); + if (!server->hostname) + return -ENOMEM; + strcpy(server->hostname, data->hostname); + + /* Check NFS protocol revision and initialize RPC op vector + * and file handle pool. */ +#ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { + server->rpc_ops = &nfs_v3_clientops; + server->caps |= NFS_CAP_READDIRPLUS; + } else { + server->rpc_ops = &nfs_v2_clientops; + } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif + + /* Fill in pseudoflavor for mount version < 5 */ + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; + authflavor = data->pseudoflavor; /* save for sb_init() */ + /* XXX maybe we want to add a server->pseudoflavor field */ + + /* Create RPC client handles */ + server->client = nfs_create_client(server, data); + if (IS_ERR(server->client)) + return PTR_ERR(server->client); + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + + server->client_sys = rpc_clone_client(server->client); + if (IS_ERR(server->client_sys)) + return PTR_ERR(server->client_sys); + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } + if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + sb->s_time_gran = 1; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + + sb->s_op = &nfs_sops; + return nfs_sb_init(sb, authflavor); +} + +static int nfs_set_super(struct super_block *s, void *data) +{ + s->s_fs_info = data; + return set_anon_super(s, data); +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) + return 0; + if (old->addr.sin_port != server->addr.sin_port) + return 0; + return !nfs_compare_fh(&old->fh, &server->fh); +} + +static struct super_block *nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + int error; + struct nfs_server *server = NULL; + struct super_block *s; + struct nfs_fh *root; + struct nfs_mount_data *data = raw_data; + + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; + } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + + s = ERR_PTR(-ENOMEM); + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + goto out_err; + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + + root = &server->fh; + if (data->flags & NFS_MOUNT_VER3) + root->size = data->root.size; + else + root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); + if (root->size > sizeof(root->data)) { + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; + } + memcpy(root->data, data->root.data, root->size); + + /* We now require that the mount process passes the remote address */ + memcpy(&server->addr, &data->addr, sizeof(server->addr)); + if (server->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; + } + + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; + } + + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; + + s->s_flags = flags; + + error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; +} + +static void nfs_kill_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + kill_anon_super(s); + + if (!IS_ERR(server->client)) + rpc_shutdown_client(server->client); + if (!IS_ERR(server->client_sys)) + rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ + + rpciod_down(); /* release rpciod */ + + nfs_free_iostats(server->io_stats); + kfree(server->hostname); + kfree(server); + nfs_release_automount_timer(); +} + +static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb; + + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; +} + +static struct super_block *nfs_clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server); +} + +#ifdef CONFIG_NFS_V4 +static struct rpc_clnt *nfs4_create_client(struct nfs_server *server, + struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor) +{ + struct nfs4_client *clp; + struct rpc_xprt *xprt = NULL; + struct rpc_clnt *clnt = NULL; + int err = -EIO; + + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); + return ERR_PTR(err); + } + + /* Now create transport and client */ + down_write(&clp->cl_sem); + if (IS_ERR(clp->cl_rpcclient)) { + xprt = xprt_create_proto(proto, &server->addr, timeparms); + if (IS_ERR(xprt)) { + up_write(&clp->cl_sem); + err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + /* Bind to a reserved port! */ + xprt->resvport = 1; + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, flavor); + if (IS_ERR(clnt)) { + up_write(&clp->cl_sem); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; + clp->cl_rpcclient = clnt; + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); + } + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + if (!IS_ERR(clnt)) + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + + if (IS_ERR(clnt)) { + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return clnt; + } + + if (server->nfs4_state->cl_idmap == NULL) { + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + + if (clnt->cl_auth->au_flavor != flavor) { + struct rpc_auth *auth; + + auth = rpcauth_create(flavor, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return (struct rpc_clnt *)auth; + } + } + return clnt; + + out_fail: + if (clp) + nfs4_put_client(clp); + return ERR_PTR(err); +} + +/* + * Set up an NFS4 superblock + */ +static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) +{ + struct nfs_server *server; + struct rpc_timeout timeparms; + rpc_authflavor_t authflavour; + int err = -EIO; + + sb->s_blocksize_bits = 0; + sb->s_blocksize = 0; + server = NFS_SB(sb); + if (data->rsize != 0) + server->rsize = nfs_block_size(data->rsize, NULL); + if (data->wsize != 0) + server->wsize = nfs_block_size(data->wsize, NULL); + server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps = NFS_CAP_ATOMIC_OPEN; + + server->acregmin = data->acregmin*HZ; + server->acregmax = data->acregmax*HZ; + server->acdirmin = data->acdirmin*HZ; + server->acdirmax = data->acdirmax*HZ; + + server->rpc_ops = &nfs_v4_clientops; + + nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); + + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + + /* Now create transport and client */ + authflavour = RPC_AUTH_UNIX; + if (data->auth_flavourlen != 0) { + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } + if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { + err = -EFAULT; + goto out_fail; + } + } + + server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); + if (IS_ERR(server->client)) { + err = PTR_ERR(server->client); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + goto out_fail; + } + + sb->s_time_gran = 1; + + sb->s_op = &nfs4_sops; + err = nfs_sb_init(sb, authflavour); + + out_fail: + return err; +} + +static int nfs4_compare_super(struct super_block *sb, void *data) +{ + struct nfs_server *server = data; + struct nfs_server *old = NFS_SB(sb); + + if (strcmp(server->hostname, old->hostname) != 0) + return 0; + if (strcmp(server->mnt_path, old->mnt_path) != 0) + return 0; + return 1; +} + +static void * +nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) +{ + void *p = NULL; + + if (!src->len) + return ERR_PTR(-EINVAL); + if (src->len < maxlen) + maxlen = src->len; + if (dst == NULL) { + p = dst = kmalloc(maxlen + 1, GFP_KERNEL); + if (p == NULL) + return ERR_PTR(-ENOMEM); + } + if (copy_from_user(dst, src->data, maxlen)) { + kfree(p); + return ERR_PTR(-EFAULT); + } + dst[maxlen] = '\0'; + return dst; +} + +static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + int error; + struct nfs_server *server; + struct super_block *s; + struct nfs4_mount_data *data = raw_data; + void *p; + + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + + server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (!server) + return ERR_PTR(-ENOMEM); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + + p = nfs_copy_user_string(NULL, &data->hostname, 256); + if (IS_ERR(p)) + goto out_err; + server->hostname = p; + + p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); + if (IS_ERR(p)) + goto out_err; + server->mnt_path = p; + + p = nfs_copy_user_string(server->ip_addr, &data->client_addr, + sizeof(server->ip_addr) - 1); + if (IS_ERR(p)) + goto out_err; + + /* We now require that the mount process passes the remote address */ + if (data->host_addrlen != sizeof(server->addr)) { + s = ERR_PTR(-EINVAL); + goto out_free; + } + if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { + s = ERR_PTR(-EFAULT); + goto out_free; + } + if (server->addr.sin_family != AF_INET || + server->addr.sin_addr.s_addr == INADDR_ANY) { + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); + s = ERR_PTR(-EINVAL); + goto out_free; + } + + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); + + if (IS_ERR(s) || s->s_root) + goto out_free; + + s->s_flags = flags; + + error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return ERR_PTR(error); + } + s->s_flags |= MS_ACTIVE; + return s; +out_err: + s = (struct super_block *)p; +out_free: + kfree(server->mnt_path); + kfree(server->hostname); + kfree(server); + return s; +} + +static void nfs4_kill_super(struct super_block *sb) +{ + struct nfs_server *server = NFS_SB(sb); + + nfs_return_all_delegations(sb); + kill_anon_super(sb); + + nfs4_renewd_prepare_shutdown(server); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + + destroy_nfsv4_state(server); + + rpciod_down(); + + nfs_free_iostats(server->io_stats); + kfree(server->hostname); + kfree(server); + nfs_release_automount_timer(); +} + +/* + * Constructs the SERVER-side path + */ +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + const struct dentry *dentry = data->dentry; + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static struct super_block *nfs_clone_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server); +} + +static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) +{ + struct super_block *sb = ERR_PTR(-ENOMEM); + int len; + + len = strlen(data->mnt_path) + 1; + server->mnt_path = kmalloc(len, GFP_KERNEL); + if (server->mnt_path == NULL) + goto err; + memcpy(server->mnt_path, data->mnt_path, len); + memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in)); + + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + +static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) +{ + struct nfs_server *server = NFS_SB(sb); + struct rpc_timeout timeparms; + int proto, timeo, retrans; + void *err; + + proto = IPPROTO_TCP; + /* Since we are following a referral and there may be alternatives, + set the timeouts and retries to low values */ + timeo = 2; + retrans = 1; + nfs_init_timeout_values(&timeparms, proto, timeo, retrans); + + server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor); + if (IS_ERR((err = server->client))) + goto out_err; + + sb->s_time_gran = 1; + sb->s_op = &nfs4_sops; + err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); + if (!IS_ERR(err)) + return server; +out_err: + return (struct nfs_server *)err; +} + +static struct super_block *nfs_referral_nfs4_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server); +} + +#endif diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e03abbd8302..b383fdd3a15 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1529,7 +1529,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, return ret; } -int nfs_init_writepagecache(void) +int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", sizeof(struct nfs_write_data), @@ -1551,7 +1551,7 @@ int nfs_init_writepagecache(void) return 0; } -void nfs_destroy_writepagecache(void) +void __exit nfs_destroy_writepagecache(void) { mempool_destroy(nfs_commit_mempool); mempool_destroy(nfs_wdata_mempool); -- cgit v1.2.3-18-g5258 From 81039f1f204a0fd2952112a240284e114f1a25e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:34 -0400 Subject: NFS: Display the chosen RPCSEC_GSS security flavour in /proc/mounts Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4acd3ee9642..30f939bcb72 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -318,6 +318,34 @@ static int nfs_statfs(struct super_block *sb, struct kstatfs *buf) } +static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) +{ + static struct { + rpc_authflavor_t flavour; + const char *str; + } sec_flavours[] = { + { RPC_AUTH_NULL, "null" }, + { RPC_AUTH_UNIX, "sys" }, + { RPC_AUTH_GSS_KRB5, "krb5" }, + { RPC_AUTH_GSS_KRB5I, "krb5i" }, + { RPC_AUTH_GSS_KRB5P, "krb5p" }, + { RPC_AUTH_GSS_LKEY, "lkey" }, + { RPC_AUTH_GSS_LKEYI, "lkeyi" }, + { RPC_AUTH_GSS_LKEYP, "lkeyp" }, + { RPC_AUTH_GSS_SPKM, "spkm" }, + { RPC_AUTH_GSS_SPKMI, "spkmi" }, + { RPC_AUTH_GSS_SPKMP, "spkmp" }, + { -1, "unknown" } + }; + int i; + + for (i=0; sec_flavours[i].flavour != -1; i++) { + if (sec_flavours[i].flavour == flavour) + break; + } + return sec_flavours[i].str; +} + /* * Describe the mount options in force on this server representation */ @@ -371,6 +399,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",proto=%s", proto); seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } /* -- cgit v1.2.3-18-g5258 From 5046791417dcac1ba126b77b8062af15a2f0b8e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:40:24 -0400 Subject: NLM: sem to mutex conversion Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 729ac427d35..5242743c940 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -112,7 +112,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; - init_MUTEX(&host->h_sema); + mutex_init(&host->h_mutex); host->h_nextrebind = jiffies + NLM_HOST_REBIND; host->h_expires = jiffies + NLM_HOST_EXPIRE; atomic_set(&host->h_count, 1); @@ -172,7 +172,7 @@ nlm_bind_host(struct nlm_host *host) (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); /* Lock host handle */ - down(&host->h_sema); + mutex_lock(&host->h_mutex); /* If we've already created an RPC client, check whether * RPC rebind is required @@ -204,12 +204,12 @@ nlm_bind_host(struct nlm_host *host) host->h_rpcclnt = clnt; } - up(&host->h_sema); + mutex_unlock(&host->h_mutex); return clnt; forgetit: printk("lockd: couldn't create RPC handle for %s\n", host->h_name); - up(&host->h_sema); + mutex_unlock(&host->h_mutex); return NULL; } -- cgit v1.2.3-18-g5258 From 28df955a2ad484d602314b30183ea8496a9aa34a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:40:27 -0400 Subject: NLM: Fix reclaim races Currently it is possible for a task to remove its locks at the same time as the NLM recovery thread is trying to recover them. This quickly leads to an Oops. Protect the locks using an rw semaphore while they are being recovered. Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 39 +++++++++++++++++++++++++-------------- fs/lockd/clntproc.c | 14 +++++++++++++- fs/lockd/host.c | 1 + 3 files changed, 39 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index bce74446870..52774feab93 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -147,11 +147,10 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, * that we mark locks for reclaiming, and that we bump the pseudo NSM state. */ -static inline -void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) +static void nlmclnt_prepare_reclaim(struct nlm_host *host) { + down_write(&host->h_rwsem); host->h_monitored = 0; - host->h_nsmstate = newstate; host->h_state++; host->h_nextrebind = 0; nlm_rebind_host(host); @@ -164,6 +163,13 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) dprintk("NLM: reclaiming locks for host %s", host->h_name); } +static void nlmclnt_finish_reclaim(struct nlm_host *host) +{ + host->h_reclaiming = 0; + up_write(&host->h_rwsem); + dprintk("NLM: done reclaiming locks for host %s", host->h_name); +} + /* * Reclaim all locks on server host. We do this by spawning a separate * reclaimer thread. @@ -171,12 +177,10 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate) void nlmclnt_recovery(struct nlm_host *host, u32 newstate) { - if (host->h_reclaiming++) { - if (host->h_nsmstate == newstate) - return; - nlmclnt_prepare_reclaim(host, newstate); - } else { - nlmclnt_prepare_reclaim(host, newstate); + if (host->h_nsmstate == newstate) + return; + host->h_nsmstate = newstate; + if (!host->h_reclaiming++) { nlm_get_host(host); __module_get(THIS_MODULE); if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) @@ -190,6 +194,7 @@ reclaimer(void *ptr) struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; struct file_lock *fl, *next; + u32 nsmstate; daemonize("%s-reclaim", host->h_name); allow_signal(SIGKILL); @@ -199,19 +204,25 @@ reclaimer(void *ptr) lock_kernel(); lockd_up(); + nlmclnt_prepare_reclaim(host); /* First, reclaim all locks that have been marked. */ restart: + nsmstate = host->h_nsmstate; list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { list_del_init(&fl->fl_u.nfs_fl.list); if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) == 0) - list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); - goto restart; + if (nlmclnt_reclaim(host, fl) != 0) + continue; + list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); + if (host->h_nsmstate != nsmstate) { + /* Argh! The server rebooted again! */ + list_splice_init(&host->h_granted, &host->h_reclaim); + goto restart; + } } - - host->h_reclaiming = 0; + nlmclnt_finish_reclaim(host); /* Now, wake up all processes that sleep on a blocked lock */ list_for_each_entry(block, &nlm_blocked, b_list) { diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index f96e38155b5..4db62098d3f 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -508,7 +508,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } block = nlmclnt_prepare_block(host, fl); +again: for(;;) { + /* Reboot protection */ + fl->fl_u.nfs_fl.state = host->h_state; status = nlmclnt_call(req, NLMPROC_LOCK); if (status < 0) goto out_unblock; @@ -531,10 +534,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } if (resp->status == NLM_LCK_GRANTED) { - fl->fl_u.nfs_fl.state = host->h_state; + down_read(&host->h_rwsem); + /* Check whether or not the server has rebooted */ + if (fl->fl_u.nfs_fl.state != host->h_state) { + up_read(&host->h_rwsem); + goto again; + } fl->fl_flags |= FL_SLEEP; /* Ensure the resulting lock will get added to granted list */ do_vfs_lock(fl); + up_read(&host->h_rwsem); } status = nlm_stat_to_errno(resp->status); out_unblock: @@ -596,6 +605,7 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) static int nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { + struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; int status; @@ -604,7 +614,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either * case, we want to unlock. */ + down_read(&host->h_rwsem); do_vfs_lock(fl); + up_read(&host->h_rwsem); if (req->a_flags & RPC_TASK_ASYNC) return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 5242743c940..38b0e8a1aec 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -117,6 +117,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin, host->h_expires = jiffies + NLM_HOST_EXPIRE; atomic_set(&host->h_count, 1); init_waitqueue_head(&host->h_gracewait); + init_rwsem(&host->h_rwsem); host->h_state = 0; /* pseudo NSM state */ host->h_nsmstate = 0; /* real NSM state */ host->h_server = server; -- cgit v1.2.3-18-g5258 From b1c5921c5b715c207d7fe77cd7aaafbb322f09f5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:55:19 -0400 Subject: NFS: Separate functions for counting outstanding NFS direct I/Os Factor out the logic that increments and decrements the outstanding I/O count. This will be a commonly used bit of code in upcoming patches. Also make this an atomic_t again, since it will be very often manipulated outside dreq->spin lock. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 402005c35ab..d78c61a41ec 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -80,8 +80,8 @@ struct nfs_direct_req { unsigned int npages; /* count of pages */ /* completion state */ + atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ - int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -97,6 +97,16 @@ struct nfs_direct_req { static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static inline void get_dreq(struct nfs_direct_req *dreq) +{ + atomic_inc(&dreq->io_count); +} + +static inline int put_dreq(struct nfs_direct_req *dreq) +{ + return atomic_dec_and_test(&dreq->io_count); +} + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -180,7 +190,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) dreq->iocb = NULL; dreq->ctx = NULL; spin_lock_init(&dreq->lock); - dreq->outstanding = 0; + atomic_set(&dreq->io_count, 0); dreq->count = 0; dreq->error = 0; dreq->flags = 0; @@ -278,7 +288,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - dreq->outstanding++; + get_dreq(dreq); if (nbytes <= rsize) break; nbytes -= rsize; @@ -302,13 +312,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) else dreq->error = task->tk_status; - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - nfs_direct_complete(dreq); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -432,7 +439,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) list_splice_init(&dreq->rewrite_list, &dreq->list); list_for_each(pos, &dreq->list) - dreq->outstanding++; + get_dreq(dreq); dreq->count = 0; nfs_direct_write_schedule(dreq, FLUSH_STABLE); @@ -564,7 +571,7 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize list_add(&data->pages, list); data->req = (struct nfs_page *) dreq; - dreq->outstanding++; + get_dreq(dreq); if (nbytes <= wsize) break; nbytes -= wsize; @@ -620,14 +627,8 @@ static void nfs_direct_write_release(void *calldata) struct nfs_write_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - spin_lock(&dreq->lock); - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - - nfs_direct_write_complete(dreq, data->inode); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, data->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { -- cgit v1.2.3-18-g5258 From fedb595c66e1fbd5acafe0d43b7e95c13c936d61 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:55:45 -0400 Subject: NFS: "open code" the NFS direct write rescheduler An NFSv3/v4 client must reschedule on-the-wire writes if the writes are UNSTABLE, and the server reboots before the client can complete a subsequent COMMIT request. To support direct asynchronous scatter-gather writes, the write rescheduler in fs/nfs/direct.c must not depend on the I/O parameters in the controlling nfs_direct_req structure. iovecs can be somewhat arbitrarily complex, so there could be an unbounded amount of information to save for a rarely encountered requirement. Refactor the direct write rescheduler so it uses information from each nfs_write_data structure to reschedule writes, instead of caching that information in the controlling nfs_direct_req structure. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d78c61a41ec..7101405713e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -94,8 +94,8 @@ struct nfs_direct_req { struct nfs_writeverf verf; /* unstable write verifier */ }; -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static const struct rpc_call_ops nfs_write_direct_ops; static inline void get_dreq(struct nfs_direct_req *dreq) { @@ -435,14 +435,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct list_head *pos; + struct inode *inode = dreq->inode; + struct list_head *p; + struct nfs_write_data *data; - list_splice_init(&dreq->rewrite_list, &dreq->list); - list_for_each(pos, &dreq->list) - get_dreq(dreq); dreq->count = 0; + get_dreq(dreq); + + list_for_each(p, &dreq->rewrite_list) { + data = list_entry(p, struct nfs_write_data, pages); + + get_dreq(dreq); + + /* + * Reset data->res. + */ + nfs_fattr_init(&data->fattr); + data->res.count = data->args.count; + memset(&data->verf, 0, sizeof(data->verf)); + + /* + * Reuse data->task; data->args should not have changed + * since the original request was sent. + */ + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; + + /* + * We're called via an RPC callback, so BKL is already held. + */ + rpc_execute(&data->task); + + dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + } - nfs_direct_write_schedule(dreq, FLUSH_STABLE); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); } static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) @@ -612,8 +649,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) } } } - /* In case we have to resend */ - data->args.stable = NFS_FILE_SYNC; spin_unlock(&dreq->lock); } -- cgit v1.2.3-18-g5258 From 51a7bc6caec94bab256b272bffd24d00ea81c698 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:56:16 -0400 Subject: NFS: remove user_addr, user_count, and pos from nfs_direct_req Make the user_addr, user_count, and pos parameters explicit to the scheduler routines, and remove the fields from nfs_direct_req. The iovec API will be passing in a series of these, not just one set. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 7101405713e..e4c9e03aff1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -73,9 +73,6 @@ struct nfs_direct_req { struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ - unsigned long user_addr; /* location of user's buffer */ - size_t user_count; /* total bytes to move */ - loff_t pos; /* starting offset in file */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ @@ -327,19 +324,17 @@ static const struct rpc_call_ops nfs_read_direct_ops = { * For each nfs_read_data struct that was allocated on the list, dispatch * an NFS READ operation */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t rsize = NFS_SERVER(inode)->rsize; unsigned int curpage, pgbase; curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; @@ -403,9 +398,6 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size if (!dreq) return -ENOMEM; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; dreq->inode = inode; @@ -415,7 +407,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq); + nfs_direct_read_schedule(dreq, user_addr, count, pos); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); @@ -516,8 +508,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->cred = dreq->ctx->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = dreq->pos; - data->args.count = dreq->user_count; + data->args.offset = 0; + data->args.count = 0; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -675,19 +667,17 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * For each nfs_write_data struct that was allocated on the list, dispatch * an NFS WRITE operation */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int curpage, pgbase; curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; @@ -756,9 +746,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; dreq->pages = pages; dreq->npages = nr_pages; dreq->inode = inode; @@ -771,7 +758,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, sync); + nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); -- cgit v1.2.3-18-g5258 From 9c93ab7dff5eb22027ab15010557bb73f9b44c99 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:56:31 -0400 Subject: NFS: refactor nfs_direct_free_user_pages Clean-up and fix a minor bug: the logic was dirtying page cache pages on both read and write operations. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e4c9e03aff1..4cb3446220b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -126,16 +126,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static void nfs_direct_dirty_pages(struct page **pages, int npages) { int i; for (i = 0; i < npages; i++) { struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) + if (!PageCompound(page)) set_page_dirty_lock(page); - page_cache_release(page); } - kfree(pages); +} + +static void nfs_direct_release_pages(struct page **pages, int npages) +{ + int i; + for (i = 0; i < npages; i++) + page_cache_release(pages[i]); } static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) @@ -162,7 +167,7 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz * end of a mapping; return EFAULT. */ if (result >= 0) { - nfs_free_user_pages(*pages, result, 0); + nfs_direct_release_pages(*pages, result); result = -EFAULT; } else kfree(*pages); @@ -238,8 +243,6 @@ out: */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - if (dreq->iocb) { long res = (long) dreq->error; if (!res) @@ -311,8 +314,11 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) spin_unlock(&dreq->lock); - if (put_dreq(dreq)) + if (put_dreq(dreq)) { + nfs_direct_dirty_pages(dreq->pages, dreq->npages); + nfs_direct_release_pages(dreq->pages, dreq->npages); nfs_direct_complete(dreq); + } } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -422,6 +428,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) list_del(&data->pages); nfs_writedata_release(data); } + nfs_direct_release_pages(dreq->pages, dreq->npages); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -- cgit v1.2.3-18-g5258 From 06cf6f2ed0b19629700794727d86ed57b9c0583e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:56:49 -0400 Subject: NFS: Eliminate nfs_get_user_pages() Neil Brown observed that the kmalloc() in nfs_get_user_pages() is more likely to fail if the I/O is large enough to require the allocation of more than a single page to keep track of all the pinned pages in the user's buffer. Instead of tracking one large page array per dreq/iocb, track pages per nfs_read/write_data, just like the cached I/O path does. An array for pages is already allocated for us by nfs_readdata_alloc() (and the write and commit equivalents). This is also required for adding support for vectored I/O to the NFS direct I/O path. The original reason to pin the user buffer and allocate all the NFS data structures before trying to schedule I/O was to ensure all needed resources are allocated on the client before starting to send requests. This reduces the chance that resource exhaustion on the client will cause a short read or write. On the other hand, for an application making very large application I/O requests, this means that it will be nearly impossible for the application to make forward progress on a resource-limited client. Thus, moving the buffer pinning functionality into the I/O scheduling loops should be good for scalability. The next patch will do the same for NFS data structure allocation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 205 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 111 insertions(+), 94 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4cb3446220b..b1630d53fbb 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -73,8 +73,6 @@ struct nfs_direct_req { struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ - struct page ** pages; /* pages in our buffer */ - unsigned int npages; /* count of pages */ /* completion state */ atomic_t io_count; /* i/os we're waiting for */ @@ -104,6 +102,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +/* + * "size" is never larger than rsize or wsize. + */ +static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) +{ + int page_count; + + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + page_count -= user_addr >> PAGE_SHIFT; + BUG_ON(page_count < 0); + + return page_count; +} + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -143,40 +155,6 @@ static void nfs_direct_release_pages(struct page **pages, int npages) page_cache_release(pages[i]); } -static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) -{ - int result = -ENOMEM; - unsigned long page_count; - size_t array_size; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - - array_size = (page_count * sizeof(struct page *)); - *pages = kmalloc(array_size, GFP_KERNEL); - if (*pages) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - page_count, (rw == READ), 0, - *pages, NULL); - up_read(¤t->mm->mmap_sem); - if (result != page_count) { - /* - * If we got fewer pages than expected from - * get_user_pages(), the user buffer runs off the - * end of a mapping; return EFAULT. - */ - if (result >= 0) { - nfs_direct_release_pages(*pages, result); - result = -EFAULT; - } else - kfree(*pages); - *pages = NULL; - } - } - return result; -} - static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -233,13 +211,8 @@ out: } /* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - * - * In addition, synchronous I/O uses a stack-allocated iocb. Thus we - * can't trust the iocb is still valid here if this is a synchronous - * request. If the waiter is woken prematurely, the iocb is long gone. + * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust + * the iocb is still valid here if this is a synchronous request. */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { @@ -297,6 +270,11 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) return dreq; } +/* + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). + */ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; @@ -305,6 +283,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; + nfs_direct_dirty_pages(data->pagevec, data->npages); + nfs_direct_release_pages(data->pagevec, data->npages); + spin_lock(&dreq->lock); if (likely(task->tk_status >= 0)) @@ -314,11 +295,8 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) spin_unlock(&dreq->lock); - if (put_dreq(dreq)) { - nfs_direct_dirty_pages(dreq->pages, dreq->npages); - nfs_direct_release_pages(dreq->pages, dreq->npages); + if (put_dreq(dreq)) nfs_direct_complete(dreq); - } } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -328,21 +306,23 @@ static const struct rpc_call_ops nfs_read_direct_ops = { /* * For each nfs_read_data struct that was allocated on the list, dispatch - * an NFS READ operation + * an NFS READ operation. If get_user_pages() fails, we stop sending reads. + * Read length accounting is handled by nfs_direct_read_result(). + * Otherwise, if no requests have been sent, just return an error. */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) +static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int curpage, pgbase; + unsigned int pgbase; + int result; + ssize_t started = 0; + struct nfs_read_data *data; - curpage = 0; pgbase = user_addr & ~PAGE_MASK; do { - struct nfs_read_data *data; size_t bytes; bytes = rsize; @@ -353,13 +333,21 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long data = list_entry(list->next, struct nfs_read_data, pages); list_del_init(&data->pages); + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 1, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) + goto out_err; + data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -382,17 +370,36 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); BUG_ON(!list_empty(list)); + return 0; + +out_err: + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + + list_add(&data->pages, list); + while (!list_empty(list)) { + data = list_entry(list->next, struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + if (put_dreq(dreq)) + nfs_direct_complete(dreq); + } + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { ssize_t result; sigset_t oldset; @@ -404,8 +411,6 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size if (!dreq) return -ENOMEM; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -413,8 +418,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq, user_addr, count, pos); - result = nfs_direct_wait(dreq); + result = nfs_direct_read_schedule(dreq, user_addr, count, pos); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -426,9 +432,9 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) while (!list_empty(&dreq->list)) { struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); list_del(&data->pages); + nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); } - nfs_direct_release_pages(dreq->pages, dreq->npages); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -672,21 +678,23 @@ static const struct rpc_call_ops nfs_write_direct_ops = { /* * For each nfs_write_data struct that was allocated on the list, dispatch - * an NFS WRITE operation + * an NFS WRITE operation. If get_user_pages() fails, we stop sending writes. + * Write length accounting is handled by nfs_direct_write_result(). + * Otherwise, if no requests have been sent, just return an error. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) +static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int curpage, pgbase; + unsigned int pgbase; + int result; + ssize_t started = 0; + struct nfs_write_data *data; - curpage = 0; pgbase = user_addr & ~PAGE_MASK; do { - struct nfs_write_data *data; size_t bytes; bytes = wsize; @@ -695,6 +703,15 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long BUG_ON(list_empty(list)); data = list_entry(list->next, struct nfs_write_data, pages); + + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 0, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) + goto out_err; + list_move_tail(&data->pages, &dreq->rewrite_list); data->inode = inode; @@ -703,7 +720,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.count = bytes; @@ -727,17 +744,36 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); BUG_ON(!list_empty(list)); + return 0; + +out_err: + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + + list_add(&data->pages, list); + while (!list_empty(list)) { + data = list_entry(list->next, struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); + } + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { ssize_t result; sigset_t oldset; @@ -753,8 +789,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -765,8 +799,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); - result = nfs_direct_wait(dreq); + result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -796,8 +831,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -819,14 +852,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - retval = nfs_get_user_pages(READ, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, - pages, page_count); + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); if (retval > 0) iocb->ki_pos = pos + retval; @@ -862,8 +888,6 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -891,14 +915,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t if (retval) goto out; - retval = nfs_get_user_pages(WRITE, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_write(iocb, (unsigned long) buf, count, - pos, pages, page_count); + retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); /* * XXX: nfs_end_data_update() already ensures this file's -- cgit v1.2.3-18-g5258 From 82b145c5a572f7fa7211dffe2097234dc91bcecc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:57:03 -0400 Subject: NFS: alloc nfs_read/write_data as direct I/O is scheduled Re-arrange the logic in the NFS direct I/O path so that nfs_read/write_data structs are allocated just before they are scheduled, rather than allocating them all at once before we start scheduling requests. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 210 ++++++++++++++++++-------------------------------------- 1 file changed, 65 insertions(+), 145 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b1630d53fbb..e25b7595b7a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -68,8 +68,6 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ - struct list_head list, /* nfs_read/write_data structs */ - rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ @@ -82,6 +80,7 @@ struct nfs_direct_req { struct completion completion; /* wait for i/o completion */ /* commit state */ + struct list_head rewrite_list; /* saved nfs_write_data structs */ struct nfs_write_data * commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ @@ -116,6 +115,11 @@ static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) return page_count; } +static inline unsigned int nfs_max_pages(unsigned int size) +{ + return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +} + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -164,8 +168,8 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); + kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; @@ -227,49 +231,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_release); } -/* - * Note we also set the number of requests we have in the dreq when we are - * done. This prevents races with I/O completion so we will always wait - * until all requests have been dispatched and completed. - */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(rpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - get_dreq(dreq); - if (nbytes <= rsize) - break; - nbytes -= rsize; - } - kref_get(&dreq->kref); - return dreq; -} - /* * We must hold a reference to all the pages in this direct read request * until the RPCs complete. This could be long *after* we are woken up in @@ -305,42 +266,53 @@ static const struct rpc_call_ops nfs_read_direct_ops = { }; /* - * For each nfs_read_data struct that was allocated on the list, dispatch - * an NFS READ operation. If get_user_pages() fails, we stop sending reads. - * Read length accounting is handled by nfs_direct_read_result(). - * Otherwise, if no requests have been sent, just return an error. + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ + * operation. If nfs_readdata_alloc() or get_user_pages() fails, + * bail and stop sending more reads. Read length accounting is + * handled automatically by nfs_direct_read_result(). Otherwise, if + * no requests have been sent, just return an error. */ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; size_t rsize = NFS_SERVER(inode)->rsize; + unsigned int rpages = nfs_max_pages(rsize); unsigned int pgbase; int result; ssize_t started = 0; - struct nfs_read_data *data; + + get_dreq(dreq); pgbase = user_addr & ~PAGE_MASK; do { + struct nfs_read_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_readdata_alloc(rpages); + if (unlikely(!data)) + break; + bytes = rsize; if (count < rsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_read_data, pages); - list_del_init(&data->pages); - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) - goto out_err; + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + get_dreq(dreq); + + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); @@ -378,21 +350,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); - return 0; -out_err: - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - - list_add(&data->pages, list); - while (!list_empty(list)) { - data = list_entry(list->next, struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - if (put_dreq(dreq)) - nfs_direct_complete(dreq); - } + if (put_dreq(dreq)) + nfs_direct_complete(dreq); if (started) return 0; @@ -401,13 +361,13 @@ out_err: static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; - dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; @@ -428,9 +388,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - list_splice_init(&dreq->rewrite_list, &dreq->list); - while (!list_empty(&dreq->list)) { - struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + while (!list_empty(&dreq->rewrite_list)) { + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); @@ -584,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_write_data *data = nfs_writedata_alloc(wpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - get_dreq(dreq); - if (nbytes <= wsize) - break; - nbytes -= wsize; - } - - nfs_alloc_commit_data(dreq); - - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -677,43 +595,55 @@ static const struct rpc_call_ops nfs_write_direct_ops = { }; /* - * For each nfs_write_data struct that was allocated on the list, dispatch - * an NFS WRITE operation. If get_user_pages() fails, we stop sending writes. - * Write length accounting is handled by nfs_direct_write_result(). - * Otherwise, if no requests have been sent, just return an error. + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. */ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int wpages = nfs_max_pages(wsize); unsigned int pgbase; int result; ssize_t started = 0; - struct nfs_write_data *data; + + get_dreq(dreq); pgbase = user_addr & ~PAGE_MASK; do { + struct nfs_write_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_writedata_alloc(wpages); + if (unlikely(!data)) + break; + bytes = wsize; if (count < wsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_write_data, pages); - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) - goto out_err; + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + + get_dreq(dreq); list_move_tail(&data->pages, &dreq->rewrite_list); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); @@ -752,21 +682,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); - return 0; - -out_err: - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - list_add(&data->pages, list); - while (!list_empty(list)) { - data = list_entry(list->next, struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); - } + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); if (started) return 0; @@ -775,7 +693,7 @@ out_err: static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -783,9 +701,11 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz size_t wsize = NFS_SERVER(inode)->wsize; int sync = 0; - dreq = nfs_direct_write_alloc(count, wsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; + nfs_alloc_commit_data(dreq); + if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; -- cgit v1.2.3-18-g5258 From ccf01ef7aa9c6c293a1c64c27331a2ce227916ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 25 Jun 2006 06:27:31 -0400 Subject: Merge branch 'odirect' --- fs/nfs/direct.c | 435 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 234 insertions(+), 201 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e25b7595b7a..402005c35ab 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -68,19 +68,25 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ + struct list_head list, /* nfs_read/write_data structs */ + rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ + unsigned long user_addr; /* location of user's buffer */ + size_t user_count; /* total bytes to move */ + loff_t pos; /* starting offset in file */ + struct page ** pages; /* pages in our buffer */ + unsigned int npages; /* count of pages */ /* completion state */ - atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ + int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ /* commit state */ - struct list_head rewrite_list; /* saved nfs_write_data structs */ struct nfs_write_data * commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ @@ -88,37 +94,8 @@ struct nfs_direct_req { struct nfs_writeverf verf; /* unstable write verifier */ }; +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); -static const struct rpc_call_ops nfs_write_direct_ops; - -static inline void get_dreq(struct nfs_direct_req *dreq) -{ - atomic_inc(&dreq->io_count); -} - -static inline int put_dreq(struct nfs_direct_req *dreq) -{ - return atomic_dec_and_test(&dreq->io_count); -} - -/* - * "size" is never larger than rsize or wsize. - */ -static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) -{ - int page_count; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - BUG_ON(page_count < 0); - - return page_count; -} - -static inline unsigned int nfs_max_pages(unsigned int size) -{ - return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -142,21 +119,50 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_direct_dirty_pages(struct page **pages, int npages) +static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) { int i; for (i = 0; i < npages; i++) { struct page *page = pages[i]; - if (!PageCompound(page)) + if (do_dirty && !PageCompound(page)) set_page_dirty_lock(page); + page_cache_release(page); } + kfree(pages); } -static void nfs_direct_release_pages(struct page **pages, int npages) +static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) { - int i; - for (i = 0; i < npages; i++) - page_cache_release(pages[i]); + int result = -ENOMEM; + unsigned long page_count; + size_t array_size; + + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + page_count -= user_addr >> PAGE_SHIFT; + + array_size = (page_count * sizeof(struct page *)); + *pages = kmalloc(array_size, GFP_KERNEL); + if (*pages) { + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + page_count, (rw == READ), 0, + *pages, NULL); + up_read(¤t->mm->mmap_sem); + if (result != page_count) { + /* + * If we got fewer pages than expected from + * get_user_pages(), the user buffer runs off the + * end of a mapping; return EFAULT. + */ + if (result >= 0) { + nfs_free_user_pages(*pages, result, 0); + result = -EFAULT; + } else + kfree(*pages); + *pages = NULL; + } + } + return result; } static inline struct nfs_direct_req *nfs_direct_req_alloc(void) @@ -168,13 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); - kref_get(&dreq->kref); init_completion(&dreq->completion); + INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; spin_lock_init(&dreq->lock); - atomic_set(&dreq->io_count, 0); + dreq->outstanding = 0; dreq->count = 0; dreq->error = 0; dreq->flags = 0; @@ -215,11 +221,18 @@ out: } /* - * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust - * the iocb is still valid here if this is a synchronous request. + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). + * + * In addition, synchronous I/O uses a stack-allocated iocb. Thus we + * can't trust the iocb is still valid here if this is a synchronous + * request. If the waiter is woken prematurely, the iocb is long gone. */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { + nfs_free_user_pages(dreq->pages, dreq->npages, 1); + if (dreq->iocb) { long res = (long) dreq->error; if (!res) @@ -232,10 +245,48 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } /* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). + * Note we also set the number of requests we have in the dreq when we are + * done. This prevents races with I/O completion so we will always wait + * until all requests have been dispatched and completed. */ +static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return NULL; + + list = &dreq->list; + for(;;) { + struct nfs_read_data *data = nfs_readdata_alloc(rpages); + + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + dreq->outstanding++; + if (nbytes <= rsize) + break; + nbytes -= rsize; + } + kref_get(&dreq->kref); + return dreq; +} + static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; @@ -244,9 +295,6 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; - nfs_direct_dirty_pages(data->pagevec, data->npages); - nfs_direct_release_pages(data->pagevec, data->npages); - spin_lock(&dreq->lock); if (likely(task->tk_status >= 0)) @@ -254,10 +302,13 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) else dreq->error = task->tk_status; - spin_unlock(&dreq->lock); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; + } - if (put_dreq(dreq)) - nfs_direct_complete(dreq); + spin_unlock(&dreq->lock); + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -266,60 +317,41 @@ static const struct rpc_call_ops nfs_read_direct_ops = { }; /* - * For each rsize'd chunk of the user's buffer, dispatch an NFS READ - * operation. If nfs_readdata_alloc() or get_user_pages() fails, - * bail and stop sending more reads. Read length accounting is - * handled automatically by nfs_direct_read_result(). Otherwise, if - * no requests have been sent, just return an error. + * For each nfs_read_data struct that was allocated on the list, dispatch + * an NFS READ operation */ -static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) +static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int rpages = nfs_max_pages(rsize); - unsigned int pgbase; - int result; - ssize_t started = 0; - - get_dreq(dreq); + unsigned int curpage, pgbase; - pgbase = user_addr & ~PAGE_MASK; + curpage = 0; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; - result = -ENOMEM; - data = nfs_readdata_alloc(rpages); - if (unlikely(!data)) - break; - bytes = rsize; if (count < rsize) bytes = count; - data->npages = nfs_direct_count_pages(user_addr, bytes); - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - data->npages, 1, 0, data->pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) { - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); - break; - } - - get_dreq(dreq); + BUG_ON(list_empty(list)); + data = list_entry(list->next, struct nfs_read_data, pages); + list_del_init(&data->pages); - data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = &pages[curpage]; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -342,35 +374,33 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo bytes, (unsigned long long)data->args.offset); - started += bytes; - user_addr += bytes; pos += bytes; pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - - if (put_dreq(dreq)) - nfs_direct_complete(dreq); - - if (started) - return 0; - return result < 0 ? (ssize_t) result : -EFAULT; + BUG_ON(!list_empty(list)); } -static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) { - ssize_t result = 0; + ssize_t result; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; - dreq = nfs_direct_req_alloc(); + dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); if (!dreq) return -ENOMEM; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; + dreq->pages = pages; + dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -378,9 +408,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - result = nfs_direct_read_schedule(dreq, user_addr, count, pos); - if (!result) - result = nfs_direct_wait(dreq); + nfs_direct_read_schedule(dreq); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -388,10 +417,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - while (!list_empty(&dreq->rewrite_list)) { - struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); + list_splice_init(&dreq->rewrite_list, &dreq->list); + while (!list_empty(&dreq->list)) { + struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); list_del(&data->pages); - nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); } } @@ -399,51 +428,14 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct inode *inode = dreq->inode; - struct list_head *p; - struct nfs_write_data *data; + struct list_head *pos; + list_splice_init(&dreq->rewrite_list, &dreq->list); + list_for_each(pos, &dreq->list) + dreq->outstanding++; dreq->count = 0; - get_dreq(dreq); - - list_for_each(p, &dreq->rewrite_list) { - data = list_entry(p, struct nfs_write_data, pages); - - get_dreq(dreq); - - /* - * Reset data->res. - */ - nfs_fattr_init(&data->fattr); - data->res.count = data->args.count; - memset(&data->verf, 0, sizeof(data->verf)); - - /* - * Reuse data->task; data->args should not have changed - * since the original request was sent. - */ - rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, - &nfs_write_direct_ops, data); - NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); - - data->task.tk_priority = RPC_PRIORITY_NORMAL; - data->task.tk_cookie = (unsigned long) inode; - - /* - * We're called via an RPC callback, so BKL is already held. - */ - rpc_execute(&data->task); - - dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - } - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); + nfs_direct_write_schedule(dreq, FLUSH_STABLE); } static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) @@ -480,8 +472,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->cred = dreq->ctx->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = 0; - data->args.count = 0; + data->args.offset = dreq->pos; + data->args.count = dreq->user_count; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -543,6 +535,47 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif +static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) +{ + struct list_head *list; + struct nfs_direct_req *dreq; + unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + dreq = nfs_direct_req_alloc(); + if (!dreq) + return NULL; + + list = &dreq->list; + for(;;) { + struct nfs_write_data *data = nfs_writedata_alloc(wpages); + + if (unlikely(!data)) { + while (!list_empty(list)) { + data = list_entry(list->next, + struct nfs_write_data, pages); + list_del(&data->pages); + nfs_writedata_free(data); + } + kref_put(&dreq->kref, nfs_direct_req_release); + return NULL; + } + + INIT_LIST_HEAD(&data->pages); + list_add(&data->pages, list); + + data->req = (struct nfs_page *) dreq; + dreq->outstanding++; + if (nbytes <= wsize) + break; + nbytes -= wsize; + } + + nfs_alloc_commit_data(dreq); + + kref_get(&dreq->kref); + return dreq; +} + static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -572,6 +605,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) } } } + /* In case we have to resend */ + data->args.stable = NFS_FILE_SYNC; spin_unlock(&dreq->lock); } @@ -585,8 +620,14 @@ static void nfs_direct_write_release(void *calldata) struct nfs_write_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, data->inode); + spin_lock(&dreq->lock); + if (--dreq->outstanding) { + spin_unlock(&dreq->lock); + return; + } + spin_unlock(&dreq->lock); + + nfs_direct_write_complete(dreq, data->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -595,62 +636,41 @@ static const struct rpc_call_ops nfs_write_direct_ops = { }; /* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation. If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes. Write length accounting is - * handled automatically by nfs_direct_write_result(). Otherwise, if - * no requests have been sent, just return an error. + * For each nfs_write_data struct that was allocated on the list, dispatch + * an NFS WRITE operation */ -static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) +static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; + struct list_head *list = &dreq->list; + struct page **pages = dreq->pages; + size_t count = dreq->user_count; + loff_t pos = dreq->pos; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int wpages = nfs_max_pages(wsize); - unsigned int pgbase; - int result; - ssize_t started = 0; + unsigned int curpage, pgbase; - get_dreq(dreq); - - pgbase = user_addr & ~PAGE_MASK; + curpage = 0; + pgbase = dreq->user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; - result = -ENOMEM; - data = nfs_writedata_alloc(wpages); - if (unlikely(!data)) - break; - bytes = wsize; if (count < wsize) bytes = count; - data->npages = nfs_direct_count_pages(user_addr, bytes); - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - data->npages, 0, 0, data->pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) { - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); - break; - } - - get_dreq(dreq); - + BUG_ON(list_empty(list)); + data = list_entry(list->next, struct nfs_write_data, pages); list_move_tail(&data->pages, &dreq->rewrite_list); - data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = &pages[curpage]; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.count = bytes; @@ -674,26 +694,19 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l bytes, (unsigned long long)data->args.offset); - started += bytes; - user_addr += bytes; pos += bytes; pgbase += bytes; + curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); - - if (started) - return 0; - return result < 0 ? (ssize_t) result : -EFAULT; + BUG_ON(!list_empty(list)); } -static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) { - ssize_t result = 0; + ssize_t result; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -701,14 +714,17 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz size_t wsize = NFS_SERVER(inode)->wsize; int sync = 0; - dreq = nfs_direct_req_alloc(); + dreq = nfs_direct_write_alloc(count, wsize); if (!dreq) return -ENOMEM; - nfs_alloc_commit_data(dreq); - if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; + dreq->user_addr = user_addr; + dreq->user_count = count; + dreq->pos = pos; + dreq->pages = pages; + dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -719,9 +735,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); - if (!result) - result = nfs_direct_wait(dreq); + nfs_direct_write_schedule(dreq, sync); + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -751,6 +766,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -772,7 +789,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); + retval = nfs_get_user_pages(READ, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, + pages, page_count); if (retval > 0) iocb->ki_pos = pos + retval; @@ -808,6 +832,8 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; + int page_count; + struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -835,7 +861,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t if (retval) goto out; - retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); + retval = nfs_get_user_pages(WRITE, (unsigned long) buf, + count, &pages); + if (retval < 0) + goto out; + page_count = retval; + + retval = nfs_direct_write(iocb, (unsigned long) buf, count, + pos, pages, page_count); /* * XXX: nfs_end_data_update() already ensures this file's -- cgit v1.2.3-18-g5258 From d75d54147db9db5194040bd1c5022df6ba36ee48 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 25 Jun 2006 02:41:26 -0700 Subject: git-nfs-build-fixes Fix various problems with nfs4 disabled. And various other things. In file included from fs/nfs/inode.c:50: fs/nfs/internal.h:24: error: static declaration of 'nfs_do_refmount' follows non-static declaration include/linux/nfs_fs.h:320: error: previous declaration of 'nfs_do_refmount' was here fs/nfs/internal.h:65: warning: 'struct nfs4_fs_locations' declared inside parameter list fs/nfs/internal.h:65: warning: its scope is only this definition or declaration, which is probably not what you want fs/nfs/internal.h: In function 'nfs4_path': fs/nfs/internal.h:97: error: 'struct nfs_server' has no member named 'mnt_path' fs/nfs/inode.c: In function 'init_once': fs/nfs/inode.c:1116: error: 'struct nfs_inode' has no member named 'open_states' fs/nfs/inode.c:1116: error: 'struct nfs_inode' has no member named 'delegation' fs/nfs/inode.c:1116: error: 'struct nfs_inode' has no member named 'delegation_state' fs/nfs/inode.c:1116: error: 'struct nfs_inode' has no member named 'rwsem' distcc[26452] ERROR: compile fs/nfs/inode.c on g5/64 failed make[1]: *** [fs/nfs/inode.o] Error 1 make: *** [fs/nfs/inode.o] Error 2 make: *** Waiting for unfinished jobs.... In file included from fs/nfs/nfs3xdr.c:26: fs/nfs/internal.h:24: error: static declaration of 'nfs_do_refmount' follows non-static declaration include/linux/nfs_fs.h:320: error: previous declaration of 'nfs_do_refmount' was here fs/nfs/internal.h:65: warning: 'struct nfs4_fs_locations' declared inside parameter list fs/nfs/internal.h:65: warning: its scope is only this definition or declaration, which is probably not what you want fs/nfs/internal.h: In function 'nfs4_path': fs/nfs/internal.h:97: error: 'struct nfs_server' has no member named 'mnt_path' distcc[26486] ERROR: compile fs/nfs/nfs3xdr.c on g5/64 failed make[1]: *** [fs/nfs/nfs3xdr.o] Error 1 make: *** [fs/nfs/nfs3xdr.o] Error 2 In file included from fs/nfs/nfs3proc.c:24: fs/nfs/internal.h:24: error: static declaration of 'nfs_do_refmount' follows non-static declaration include/linux/nfs_fs.h:320: error: previous declaration of 'nfs_do_refmount' was here fs/nfs/internal.h:65: warning: 'struct nfs4_fs_locations' declared inside parameter list fs/nfs/internal.h:65: warning: its scope is only this definition or declaration, which is probably not what you want fs/nfs/internal.h: In function 'nfs4_path': fs/nfs/internal.h:97: error: 'struct nfs_server' has no member named 'mnt_path' distcc[26469] ERROR: compile fs/nfs/nfs3proc.c on bix/32 failed make[1]: *** [fs/nfs/nfs3proc.o] Error 1 make: *** [fs/nfs/nfs3proc.o] Error 2 **FAILED** Cc: Alexey Dobriyan Cc: Andreas Gruenbacher Cc: Andy Adamson Cc: Chuck Lever Cc: David Howells Cc: J. Bruce Fields Cc: Manoj Naik Cc: Marc Eshel Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 16 +++++++++------- fs/nfs/internal.h | 9 ++++++++- fs/nfs/nfs2xdr.c | 2 ++ 3 files changed, 19 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 24a7139d344..51bc88b662f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1089,13 +1089,15 @@ void nfs_destroy_inode(struct inode *inode) kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); } -#define nfs4_init_once(nfsi) \ - do { \ - INIT_LIST_HEAD(&(nfsi)->open_states); \ - nfsi->delegation = NULL; \ - nfsi->delegation_state = 0; \ - init_rwsem(&nfsi->rwsem); \ - } while(0) +static inline void nfs4_init_once(struct nfs_inode *nfsi) +{ +#ifdef CONFIG_NFS_V4 + INIT_LIST_HEAD(&nfsi->open_states); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + init_rwsem(&nfsi->rwsem); +#endif +} static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5e51c4535b6..bd2815e2dec 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -58,11 +58,13 @@ extern int nfs_stat_to_errno(int); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); /* nfs4proc.c */ +#ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, struct nfs4_fs_locations *fs_locations, struct page *page); +#endif /* inode.c */ extern struct inode *nfs_alloc_inode(struct super_block *sb); @@ -92,9 +94,14 @@ extern char *nfs_path(const char *base, const struct dentry *dentry, /* * Determine the mount path as a string */ -static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +static inline char * +nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) { +#ifdef CONFIG_NFS_V4 return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +#else + return NULL; +#endif } /* diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 67391eef6b9..3b939e055a0 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -25,6 +25,8 @@ #include #include "internal.h" +#include "internal.h" + #define NFSDBG_FACILITY NFSDBG_XDR /* #define NFS_PARANOIA 1 */ -- cgit v1.2.3-18-g5258 From 6ab86aa13045e7f6742af0b3c3c45f952f9fbb8d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 25 Jun 2006 02:41:27 -0700 Subject: nfs-build-fix-99 fs/built-in.o:(__param+0x20): undefined reference to `nfs_idmap_cache_timeout' fs/built-in.o:(__param+0x48): undefined reference to `nfs_callback_set_tcpport' Cc: Alexey Dobriyan Cc: Andreas Gruenbacher Cc: Andy Adamson Cc: Chuck Lever Cc: David Howells Cc: J. Bruce Fields Cc: Manoj Naik Cc: Marc Eshel Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b977748553d..e8a9bee74d9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -187,6 +187,7 @@ static struct super_operations nfs4_sops = { }; #endif +#ifdef CONFIG_NFS_V4 static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; @@ -202,7 +203,9 @@ static int param_set_port(const char *val, struct kernel_param *kp) module_param_call(callback_tcpport, param_set_port, param_get_int, &nfs_callback_set_tcpport, 0644); +#endif +#ifdef CONFIG_NFS_V4 static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) { char *endp; @@ -216,6 +219,7 @@ static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, &nfs_idmap_cache_timeout, 0644); +#endif /* * Register the NFS filesystems -- cgit v1.2.3-18-g5258 From 9bf2aa129a107a0e9e2a5318d35aca731ae7e666 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 25 Jun 2006 02:41:28 -0700 Subject: nfs: remove nfs_put_link() Signed-off-by: Alexey Dobriyan Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/symlink.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 636c479995b..600bbe630ab 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -75,22 +75,13 @@ read_failed: return NULL; } -static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) -{ - if (cookie) { - struct page *page = cookie; - kunmap(page); - page_cache_release(page); - } -} - /* * symlinks can't do much... */ struct inode_operations nfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = nfs_follow_link, - .put_link = nfs_put_link, + .put_link = page_put_link, .getattr = nfs_getattr, .setattr = nfs_setattr, }; -- cgit v1.2.3-18-g5258