diff options
Diffstat (limited to 'fs/nfs/read.c')
| -rw-r--r-- | fs/nfs/read.c | 530 |
1 files changed, 147 insertions, 383 deletions
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index db9b360ae19..e818a475ca6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,59 +18,29 @@ #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> #include <linux/nfs_page.h> - -#include <asm/system.h> +#include <linux/module.h> #include "nfs4_fs.h" #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); -static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); -static const struct rpc_call_ops nfs_read_partial_ops; -static const struct rpc_call_ops nfs_read_full_ops; +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; +static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -static mempool_t *nfs_rdata_mempool; - -#define MIN_POOL_READ (32) - -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) -{ - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); - - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - p->npages = pagecount; - p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; - else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!p->pagevec) { - mempool_free(p, nfs_rdata_mempool); - p = NULL; - } - } - } - return p; -} -void nfs_readdata_free(struct nfs_read_data *p) +static struct nfs_rw_header *nfs_readhdr_alloc(void) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_rdata_mempool); + return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -static void nfs_readdata_release(struct nfs_read_data *rdata) +static void nfs_readhdr_free(struct nfs_rw_header *rhdr) { - put_nfs_open_context(rdata->args.context); - nfs_readdata_free(rdata); + kmem_cache_free(nfs_rdata_cachep, rhdr); } static @@ -82,46 +52,40 @@ int nfs_return_empty_page(struct page *page) return 0; } -static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, bool force_mds, + const struct nfs_pgio_completion_ops *compl_ops) { - unsigned int remainder = data->args.count - data->res.count; - unsigned int base = data->args.pgbase + data->res.count; - unsigned int pglen; - struct page **pages; + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_read_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, + server->rsize, 0); +} +EXPORT_SYMBOL_GPL(nfs_pageio_init_read); - if (data->res.eof == 0 || remainder == 0) - return; - /* - * Note: "remainder" can never be negative, since we check for - * this in the XDR code. - */ - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - pglen = PAGE_CACHE_SIZE - base; - for (;;) { - if (remainder <= pglen) { - zero_user(*pages, base, remainder); - break; - } - zero_user(*pages, base, pglen); - pages++; - remainder -= pglen; - pglen = PAGE_CACHE_SIZE; - base = 0; - } +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pgio_rw_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } +EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { - LIST_HEAD(one_request); struct nfs_page *new; unsigned int len; + struct nfs_pageio_descriptor pgio; len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, inode, page, 0, len); + new = nfs_create_request(ctx, page, NULL, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -129,369 +93,171 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_list_add_request(new, &one_request); - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(inode, &one_request, 1, len, 0); - else - nfs_pagein_one(inode, &one_request, 1, len, 0); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); + nfs_pageio_add_request(&pgio, new); + nfs_pageio_complete(&pgio); + NFS_I(inode)->read_io += pgio.pg_bytes_written; return 0; } static void nfs_readpage_release(struct nfs_page *req) { - struct inode *d_inode = req->wb_context->path.dentry->d_inode; + struct inode *d_inode = req->wb_context->dentry->d_inode; - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + (long long)req_offset(req)); - unlock_page(req->wb_page); + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); - dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_context->path.dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + unlock_page(req->wb_page); + } + + dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", + req->wb_context->dentry->d_inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - nfs_clear_request(req); nfs_release_request(req); } -/* - * Set up the NFS read request struct - */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset) +static void nfs_page_group_set_uptodate(struct nfs_page *req) { - struct inode *inode = req->wb_context->path.dentry->d_inode; - int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = req->wb_context->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | swap_flags, - }; - - data->req = req; - data->inode = inode; - data->cred = msg.rpc_cred; - - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req) + offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - nfs_fattr_init(&data->fattr); - - /* Set up the initial task struct. */ - NFS_PROTO(inode)->read_setup(data, &msg); - - dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - count, - (unsigned long long)data->args.offset); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; + if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) + SetPageUptodate(req->wb_page); } -static void -nfs_async_read_error(struct list_head *head) +static void nfs_read_completion(struct nfs_pgio_header *hdr) { - struct nfs_page *req; + unsigned long bytes = 0; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + unsigned long start = req->wb_pgbase; + unsigned long end = req->wb_pgbase + req->wb_bytes; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + /* note: regions of the page not covered by a + * request are zeroed in nfs_readpage_async / + * readpage_async_filler */ + if (bytes > hdr->good_bytes) { + /* nothing in this request was good, so zero + * the full extent of the request */ + zero_user_segment(page, start, end); + + } else if (hdr->good_bytes - bytes < req->wb_bytes) { + /* part of this request has good bytes, but + * not all. zero the bad bytes */ + start += hdr->good_bytes - bytes; + WARN_ON(start < req->wb_pgbase); + zero_user_segment(page, start, end); + } + } + bytes += req->wb_bytes; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + if (bytes <= hdr->good_bytes) + nfs_page_group_set_uptodate(req); + } else + nfs_page_group_set_uptodate(req); nfs_list_remove_request(req); - SetPageError(req->wb_page); nfs_readpage_release(req); } +out: + hdr->release(hdr); } -/* - * Generate multiple requests to fill a single page. - * - * We optimize to reduce the number of read operations on the wire. If we - * detect that we're reading a page, or an area of a page, that is past the - * end of file, we do not generate NFS read operations but just clear the - * parts of the page that would have come back zero from the server anyway. - * - * We rely on the cached value of i_size to make this determination; another - * client can fill pages on the server past our cached end-of-file, but we - * won't see the new data until our attribute cache is updated. This is more - * or less conventional NFS client behavior. - */ -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { - struct nfs_page *req = nfs_list_entry(head->next); - struct page *page = req->wb_page; - struct nfs_read_data *data; - size_t rsize = NFS_SERVER(inode)->rsize, nbytes; - unsigned int offset; - int requests = 0; - int ret = 0; - LIST_HEAD(list); - - nfs_list_remove_request(req); - - nbytes = count; - do { - size_t len = min(nbytes,rsize); - - data = nfs_readdata_alloc(1); - if (!data) - goto out_bad; - list_add(&data->pages, &list); - requests++; - nbytes -= len; - } while(nbytes != 0); - atomic_set(&req->wb_complete, requests); - - ClearPageError(page); - offset = 0; - nbytes = count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_read_data, pages); - list_del_init(&data->pages); - - data->pagevec[0] = page; - - if (nbytes < rsize) - rsize = nbytes; - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset); - if (ret == 0) - ret = ret2; - offset += rsize; - nbytes -= rsize; - } while (nbytes != 0); - - return ret; + struct inode *inode = data->header->inode; + int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; -out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - } - SetPageError(page); - nfs_readpage_release(req); - return -ENOMEM; + task_setup_data->flags |= swap_flags; + NFS_PROTO(inode)->read_setup(data, msg); } -static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +static void +nfs_async_read_error(struct list_head *head) { - struct nfs_page *req; - struct page **pages; - struct nfs_read_data *data; - int ret = -ENOMEM; - - data = nfs_readdata_alloc(npages); - if (!data) - goto out_bad; + struct nfs_page *req; - pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); - ClearPageError(req->wb_page); - *pages++ = req->wb_page; + nfs_readpage_release(req); } - req = nfs_list_entry(data->pages.next); - - return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); -out_bad: - nfs_async_read_error(head); - return ret; } +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { + .error_cleanup = nfs_async_read_error, + .completion = nfs_read_completion, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - int status; - - dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, - task->tk_status); - - status = NFS_PROTO(data->inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; - nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); - nfs_mark_for_revalidate(data->inode); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_mark_for_revalidate(inode); } return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_readargs *argp = &data->args; - struct nfs_readres *resp = &data->res; - - if (resp->eof || resp->count == argp->count) - return; + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; /* This is a short read! */ - nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ - if (resp->count == 0) + if (resp->count == 0) { + nfs_set_pgio_error(data->header, -EIO, argp->offset); return; - + } /* Yes, so retry the read at the end of the data */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); -} - -/* - * Handle a read reply that fills part of a page. - */ -static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - return; - - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_retry(task, data); + rpc_restart_call_prepare(task); } -static void nfs_readpage_release_partial(void *calldata) +static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_read_data *data = calldata; - struct nfs_page *req = data->req; - struct page *page = req->wb_page; - int status = data->task.tk_status; + struct nfs_pgio_header *hdr = data->header; - if (status < 0) - SetPageError(page); + if (data->res.eof) { + loff_t bound; - if (atomic_dec_and_test(&req->wb_complete)) { - if (!PageError(page)) - SetPageUptodate(page); - nfs_readpage_release(req); - } - nfs_readdata_release(calldata); -} - -#if defined(CONFIG_NFS_V4_1) -void nfs_read_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - - if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, - &data->args.seq_args, &data->res.seq_res, - 0, task)) - return; - rpc_call_start(task); -} -#endif /* CONFIG_NFS_V4_1 */ - -static const struct rpc_call_ops nfs_read_partial_ops = { -#if defined(CONFIG_NFS_V4_1) - .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ - .rpc_call_done = nfs_readpage_result_partial, - .rpc_release = nfs_readpage_release_partial, -}; - -static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) -{ - unsigned int count = data->res.count; - unsigned int base = data->args.pgbase; - struct page **pages; - - if (data->res.eof) - count = data->args.count; - if (unlikely(count == 0)) - return; - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - count += base; - for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) - SetPageUptodate(*pages); - if (count == 0) - return; - /* Was this a short read? */ - if (data->res.eof || data->res.count == data->args.count) - SetPageUptodate(*pages); -} - -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - return; - /* - * Note: nfs_readpage_retry may change the values of - * data->args. In the multi-page case, we therefore need - * to ensure that we call nfs_readpage_set_pages_uptodate() - * first. - */ - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_set_pages_uptodate(data); - nfs_readpage_retry(task, data); -} - -static void nfs_readpage_release_full(void *calldata) -{ - struct nfs_read_data *data = calldata; - - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); - - nfs_list_remove_request(req); - nfs_readpage_release(req); - } - nfs_readdata_release(calldata); + bound = data->args.offset + data->res.count; + spin_lock(&hdr->lock); + if (bound < hdr->io_start + hdr->good_bytes) { + set_bit(NFS_IOHDR_EOF, &hdr->flags); + clear_bit(NFS_IOHDR_ERROR, &hdr->flags); + hdr->good_bytes = bound - hdr->io_start; + } + spin_unlock(&hdr->lock); + } else if (data->res.count != data->args.count) + nfs_readpage_retry(task, data); } -static const struct rpc_call_ops nfs_read_full_ops = { -#if defined(CONFIG_NFS_V4_1) - .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ - .rpc_call_done = nfs_readpage_result_full, - .rpc_release = nfs_readpage_release_full, -}; - /* * Read a page over NFS. * We read the page synchronously in the following case: @@ -501,11 +267,11 @@ static const struct rpc_call_ops nfs_read_full_ops = { int nfs_readpage(struct file *file, struct page *page) { struct nfs_open_context *ctx; - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int error; dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", - page, PAGE_CACHE_SIZE, page->index); + page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); nfs_add_stats(inode, NFSIOS_READPAGES, 1); @@ -559,7 +325,6 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page->mapping->host; struct nfs_page *new; unsigned int len; int error; @@ -568,7 +333,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, inode, page, 0, len); + new = nfs_create_request(desc->ctx, page, NULL, 0, len); if (IS_ERR(new)) goto out_error; @@ -581,7 +346,6 @@ readpage_async_filler(void *data, struct page *page) return 0; out_error: error = PTR_ERR(new); - SetPageError(page); out_unlock: unlock_page(page); return error; @@ -595,14 +359,12 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, .pgio = &pgio, }; struct inode *inode = mapping->host; - struct nfs_server *server = NFS_SERVER(inode); - size_t rsize = server->rsize; unsigned long npages; int ret = -ESTALE; - dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + dprintk("NFS: nfs_readpages (%s/%Lu %d)\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + (unsigned long long)NFS_FILEID(inode), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); @@ -624,14 +386,13 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); nfs_pageio_complete(&pgio); + NFS_I(inode)->read_io += pgio.pg_bytes_written; npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); read_complete: @@ -643,22 +404,25 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_data), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) return -ENOMEM; - nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, - nfs_rdata_cachep); - if (nfs_rdata_mempool == NULL) - return -ENOMEM; - return 0; } void nfs_destroy_readpagecache(void) { - mempool_destroy(nfs_rdata_mempool); kmem_cache_destroy(nfs_rdata_cachep); } + +static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_mode = FMODE_READ, + .rw_alloc_header = nfs_readhdr_alloc, + .rw_free_header = nfs_readhdr_free, + .rw_done = nfs_readpage_done, + .rw_result = nfs_readpage_result, + .rw_initiate = nfs_initiate_read, +}; |
