diff options
Diffstat (limited to 'fs/nfs/blocklayout/blocklayout.c')
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 202 |
1 files changed, 131 insertions, 71 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 281ae95932c..48cfac31f64 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) */ struct parallel_io { struct kref refcnt; - struct rpc_call_ops call_ops; - void (*pnfs_callback) (void *data); + void (*pnfs_callback) (void *data, int num_se); void *data; + int bse_count; }; static inline struct parallel_io *alloc_parallel(void *data) @@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) if (rv) { rv->data = data; kref_init(&rv->refcnt); + rv->bse_count = 0; } return rv; } @@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref) struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); dprintk("%s enter\n", __func__); - p->pnfs_callback(p->data); + p->pnfs_callback(p->data, p->bse_count); kfree(p); } @@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, { struct bio *bio; + npg = min(npg, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, npg); - if (!bio) - return NULL; + if (!bio && (current->flags & PF_MEMALLOC)) { + while (!bio && (npg /= 2)) + bio = bio_alloc(GFP_NOIO, npg); + } - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; - bio->bi_bdev = be->be_mdev; - bio->bi_end_io = end_io; - bio->bi_private = par; + if (bio) { + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = end_io; + bio->bi_private = par; + } return bio; } @@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work) } static void -bl_end_par_io_read(void *data) +bl_end_par_io_read(void *data, int unused) { struct nfs_read_data *rdata = data; + rdata->task.tk_status = rdata->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); schedule_work(&rdata->task.u.tk_work); } -/* We don't want normal .rpc_call_done callback used, so we replace it - * with this stub. - */ -static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) -{ - return; -} - static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata) par = alloc_parallel(rdata); if (!par) goto use_mds; - par->call_ops = *rdata->mds_ops; - par->call_ops.rpc_call_done = bl_rpc_do_nothing; par->pnfs_callback = bl_end_par_io_read; /* At this point, we can no longer jump to use_mds */ @@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, { sector_t isect, end; struct pnfs_block_extent *be; + struct pnfs_block_short_extent *se; dprintk("%s(%llu, %u)\n", __func__, offset, count); if (count == 0) @@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, be = bl_find_get_extent(bl, isect, NULL); BUG_ON(!be); /* FIXME */ len = min(end, be->be_f_offset + be->be_length) - isect; - if (be->be_state == PNFS_BLOCK_INVALID_DATA) - bl_mark_for_commit(be, isect, len); /* What if fails? */ + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + se = bl_pop_one_short_extent(be->be_inval); + BUG_ON(!se); + bl_mark_for_commit(be, isect, len, se); + } isect += len; bl_put_extent(be); } @@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) end_page_writeback(page); page_cache_release(page); } while (bvec >= bio->bi_io_vec); - if (!uptodate) { + + if (unlikely(!uptodate)) { if (!wdata->pnfs_error) wdata->pnfs_error = -EIO; pnfs_set_lo_fail(wdata->lseg); @@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) put_parallel(par); } -/* This is basically copied from mpage_end_io_read */ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; @@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work) dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); wdata = container_of(task, struct nfs_write_data, task); - if (!wdata->pnfs_error) { + if (likely(!wdata->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ mark_extents_written(BLK_LSEG2EXT(wdata->lseg), wdata->args.offset, wdata->args.count); @@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work) } /* Called when last of bios associated with a bl_write_pagelist call finishes */ -static void bl_end_par_io_write(void *data) +static void bl_end_par_io_write(void *data, int num_se) { struct nfs_write_data *wdata = data; - wdata->task.tk_status = 0; + if (unlikely(wdata->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, + num_se); + } + + wdata->task.tk_status = wdata->pnfs_error; wdata->verf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); schedule_work(&wdata->task.u.tk_work); @@ -484,6 +490,55 @@ cleanup: return ret; } +/* Find or create a zeroing page marked being writeback. + * Return ERR_PTR on error, NULL to indicate skip this page and page itself + * to indicate write out. + */ +static struct page * +bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, + struct pnfs_block_extent *cow_read) +{ + struct page *page; + int locked = 0; + page = find_get_page(inode->i_mapping, index); + if (page) + goto check_page; + + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + if (unlikely(!page)) { + dprintk("%s oom\n", __func__); + return ERR_PTR(-ENOMEM); + } + locked = 1; + +check_page: + /* PageDirty: Other will write this out + * PageWriteback: Other is writing this out + * PageUptodate: It was read before + */ + if (PageDirty(page) || PageWriteback(page)) { + print_page(page); + if (locked) + unlock_page(page); + page_cache_release(page); + return NULL; + } + + if (!locked) { + lock_page(page); + locked = 1; + goto check_page; + } + if (!PageUptodate(page)) { + /* New page, readin or zero it */ + init_page_for_write(page, cow_read); + } + set_page_writeback(page); + unlock_page(page); + + return page; +} + static enum pnfs_try_status bl_write_pagelist(struct nfs_write_data *wdata, int sync) { @@ -508,9 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) */ par = alloc_parallel(wdata); if (!par) - return PNFS_NOT_ATTEMPTED; - par->call_ops = *wdata->mds_ops; - par->call_ops.rpc_call_done = bl_rpc_do_nothing; + goto out_mds; par->pnfs_callback = bl_end_par_io_write; /* At this point, have to be more careful with error handling */ @@ -518,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); if (!be || !is_writable(be, isect)) { dprintk("%s no matching extents!\n", __func__); - wdata->pnfs_error = -EINVAL; - goto out; + goto out_mds; } /* First page inside INVALID extent */ if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + if (likely(!bl_push_one_short_extent(be->be_inval))) + par->bse_count++; + else + goto out_mds; temp = offset >> PAGE_CACHE_SHIFT; npg_zero = do_div(temp, npg_per_block); isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & @@ -543,36 +599,16 @@ fill_invalid_ext: dprintk("%s zero %dth page: index %lu isect %llu\n", __func__, npg_zero, index, (unsigned long long)isect); - page = - find_or_create_page(wdata->inode->i_mapping, index, - GFP_NOFS); - if (!page) { - dprintk("%s oom\n", __func__); - wdata->pnfs_error = -ENOMEM; + page = bl_find_get_zeroing_page(wdata->inode, index, + cow_read); + if (unlikely(IS_ERR(page))) { + wdata->pnfs_error = PTR_ERR(page); goto out; - } - - /* PageDirty: Other will write this out - * PageWriteback: Other is writing this out - * PageUptodate: It was read before - * sector_initialized: already written out - */ - if (PageDirty(page) || PageWriteback(page)) { - print_page(page); - unlock_page(page); - page_cache_release(page); + } else if (page == NULL) goto next_page; - } - if (!PageUptodate(page)) { - /* New page, readin or zero it */ - init_page_for_write(page, cow_read); - } - set_page_writeback(page); - unlock_page(page); ret = bl_mark_sectors_init(be->be_inval, isect, - PAGE_CACHE_SECTORS, - NULL); + PAGE_CACHE_SECTORS); if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); @@ -581,6 +617,19 @@ fill_invalid_ext: wdata->pnfs_error = ret; goto out; } + if (likely(!bl_push_one_short_extent(be->be_inval))) + par->bse_count++; + else { + end_page_writeback(page); + page_cache_release(page); + wdata->pnfs_error = -ENOMEM; + goto out; + } + /* FIXME: This should be done in bi_end_io */ + mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE); + bio = bl_add_page_to_bio(bio, npg_zero, WRITE, isect, page, be, bl_end_io_write_zero, par); @@ -589,10 +638,6 @@ fill_invalid_ext: bio = NULL; goto out; } - /* FIXME: This should be done in bi_end_io */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), - page->index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE); next_page: isect += PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS; @@ -616,13 +661,21 @@ next_page: wdata->pnfs_error = -EINVAL; goto out; } + if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + if (likely(!bl_push_one_short_extent( + be->be_inval))) + par->bse_count++; + else { + wdata->pnfs_error = -ENOMEM; + goto out; + } + } extent_length = be->be_length - (isect - be->be_f_offset); } if (be->be_state == PNFS_BLOCK_INVALID_DATA) { ret = bl_mark_sectors_init(be->be_inval, isect, - PAGE_CACHE_SECTORS, - NULL); + PAGE_CACHE_SECTORS); if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); @@ -664,6 +717,10 @@ out: bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; +out_mds: + bl_put_extent(be); + kfree(par); + return PNFS_NOT_ATTEMPTED; } /* FIXME - range ignored */ @@ -690,11 +747,17 @@ static void release_inval_marks(struct pnfs_inval_markings *marks) { struct pnfs_inval_tracking *pos, *temp; + struct pnfs_block_short_extent *se, *stemp; list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { list_del(&pos->it_link); kfree(pos); } + + list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { + list_del(&se->bse_node); + kfree(se); + } return; } @@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) static void free_blk_mountid(struct block_mount_id *mid) { if (mid) { - struct pnfs_block_dev *dev; - spin_lock(&mid->bm_lock); - while (!list_empty(&mid->bm_devlist)) { - dev = list_first_entry(&mid->bm_devlist, - struct pnfs_block_dev, - bm_node); + struct pnfs_block_dev *dev, *tmp; + + /* No need to take bm_lock as we are last user freeing bm_devlist */ + list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { list_del(&dev->bm_node); bl_free_block_dev(dev); } - spin_unlock(&mid->bm_lock); kfree(mid); } } |