aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs/blocklayout
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/blocklayout')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c382
-rw-r--r--fs/nfs/blocklayout/blocklayout.h5
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c29
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c12
-rw-r--r--fs/nfs/blocklayout/extents.c5
5 files changed, 322 insertions, 111 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dd392ed5f2e..9b431f44fad 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -37,8 +37,10 @@
#include <linux/bio.h> /* struct bio */
#include <linux/buffer_head.h> /* various write calls */
#include <linux/prefetch.h>
+#include <linux/pagevec.h>
#include "../pnfs.h"
+#include "../nfs4session.h"
#include "../internal.h"
#include "blocklayout.h"
@@ -132,8 +134,8 @@ bl_submit_bio(int rw, struct bio *bio)
if (bio) {
get_parallel(bio->bi_private);
dprintk("%s submitting %s bio %u@%llu\n", __func__,
- rw == READ ? "read" : "write",
- bio->bi_size, (unsigned long long)bio->bi_sector);
+ rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+ (unsigned long long)bio->bi_iter.bi_sector);
submit_bio(rw, bio);
}
return NULL;
@@ -154,7 +156,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
}
if (bio) {
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_iter.bi_sector = isect - be->be_f_offset +
+ be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
@@ -162,42 +165,52 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
return bio;
}
-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
sector_t isect, struct page *page,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
- struct parallel_io *par)
+ struct parallel_io *par,
+ unsigned int offset, int len)
{
+ isect = isect + (offset >> SECTOR_SHIFT);
+ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
+ npg, rw, (unsigned long long)isect, offset, len);
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, len, offset) < len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}
+static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+ struct parallel_io *par)
+{
+ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
+ end_io, par, 0, PAGE_CACHE_SIZE);
+}
+
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec;
+ int i;
- do {
- struct page *page = bvec->bv_page;
+ if (!err)
+ bio_for_each_segment_all(bvec, bio, i)
+ SetPageUptodate(bvec->bv_page);
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
- if (uptodate)
- SetPageUptodate(page);
- } while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
- struct nfs_read_data *rdata = par->data;
+ if (err) {
+ struct nfs_pgio_data *rdata = par->data;
struct nfs_pgio_header *header = rdata->header;
if (!header->pnfs_error)
@@ -211,33 +224,25 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
-static bool
-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
-{
- if ((offset & blkmask) || (len & blkmask))
- return false;
- return true;
-}
-
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *header = rdata->header;
int i, hole;
@@ -246,15 +251,15 @@ bl_read_pagelist(struct nfs_read_data *rdata)
sector_t isect, extent_length = 0;
struct parallel_io *par;
loff_t f_offset = rdata->args.offset;
+ size_t bytes_left = rdata->args.count;
+ unsigned int pg_offset, pg_len;
struct page **pages = rdata->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
- if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
- goto use_mds;
-
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
@@ -284,36 +289,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
extent_length = min(extent_length, cow_length);
}
}
+
+ if (is_dio) {
+ pg_offset = f_offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = bytes_left;
+
+ f_offset += pg_len;
+ bytes_left -= pg_len;
+ isect += (pg_offset >> SECTOR_SHIFT);
+ } else {
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ }
+
hole = is_hole(be, isect);
if (hole && !cow_read) {
bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__);
- zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+ zero_user_segment(pages[i], pg_offset, pg_len);
print_page(pages[i]);
SetPageUptodate(pages[i]);
} else {
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
READ,
isect, pages[i], be_read,
- bl_end_io_read, par);
+ bl_end_io_read, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
}
- isect += PAGE_CACHE_SECTORS;
+ isect += (pg_len >> SECTOR_SHIFT);
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - rdata->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
+ rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
}
out:
bl_put_extent(be);
@@ -358,21 +380,17 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
static void bl_end_io_write_zero(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-
- do {
- struct page *page = bvec->bv_page;
+ struct bio_vec *bvec;
+ int i;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
+ bio_for_each_segment_all(bvec, bio, i) {
/* This is the zeroing page we added */
- end_page_writeback(page);
- page_cache_release(page);
- } while (bvec >= bio->bi_io_vec);
+ end_page_writeback(bvec->bv_page);
+ page_cache_release(bvec->bv_page);
+ }
- if (unlikely(!uptodate)) {
- struct nfs_write_data *data = par->data;
+ if (unlikely(err)) {
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!header->pnfs_error)
@@ -387,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *data = par->data;
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!uptodate) {
@@ -405,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -420,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(wdata->header->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -461,6 +479,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
return;
}
+static void
+bl_read_single_end_io(struct bio *bio, int error)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct page *page = bvec->bv_page;
+
+ /* Only one page in bvec */
+ unlock_page(page);
+}
+
+static int
+bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int offset, unsigned int len)
+{
+ struct bio *bio;
+ struct page *shadow_page;
+ sector_t isect;
+ char *kaddr, *kshadow_addr;
+ int ret = 0;
+
+ dprintk("%s: offset %u len %u\n", __func__, offset, len);
+
+ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (shadow_page == NULL)
+ return -ENOMEM;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
+ (offset / SECTOR_SIZE);
+
+ bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = bl_read_single_end_io;
+
+ lock_page(shadow_page);
+ if (bio_add_page(bio, shadow_page,
+ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
+ unlock_page(shadow_page);
+ bio_put(bio);
+ return -EIO;
+ }
+
+ submit_bio(READ, bio);
+ wait_on_page_locked(shadow_page);
+ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
+ ret = -EIO;
+ } else {
+ kaddr = kmap_atomic(page);
+ kshadow_addr = kmap_atomic(shadow_page);
+ memcpy(kaddr + offset, kshadow_addr + offset, len);
+ kunmap_atomic(kshadow_addr);
+ kunmap_atomic(kaddr);
+ }
+ __free_page(shadow_page);
+ bio_put(bio);
+
+ return ret;
+}
+
+static int
+bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int dirty_offset, unsigned int dirty_len,
+ bool full_page)
+{
+ int ret = 0;
+ unsigned int start, end;
+
+ if (full_page) {
+ start = 0;
+ end = PAGE_CACHE_SIZE;
+ } else {
+ start = round_down(dirty_offset, SECTOR_SIZE);
+ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
+ }
+
+ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
+ if (!be) {
+ zero_user_segments(page, start, dirty_offset,
+ dirty_offset + dirty_len, end);
+ if (start == 0 && end == PAGE_CACHE_SIZE &&
+ trylock_page(page)) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return ret;
+ }
+
+ if (start != dirty_offset)
+ ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
+
+ if (!ret && (dirty_offset + dirty_len < end))
+ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
+ end - dirty_offset - dirty_len);
+
+ return ret;
+}
+
/* Given an unmapped page, zero it or read in page for COW, page is locked
* by caller.
*/
@@ -494,7 +612,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
SetPageUptodate(page);
cleanup:
- bl_put_extent(cow_read);
if (bh)
free_buffer_head(bh);
if (ret) {
@@ -556,7 +673,7 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
{
struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
@@ -566,6 +683,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages;
struct page *page;
pgoff_t index;
@@ -574,10 +692,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
- /* Check for alignment first */
- if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
- goto out_mds;
+ if (header->dreq != NULL &&
+ (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) ||
+ !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) {
+ dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
+ goto out_mds;
+ }
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
@@ -674,10 +795,11 @@ next_page:
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
+ bl_put_extent(cow_read);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
- isect, NULL);
+ isect, &cow_read);
if (!be || !is_writable(be, isect)) {
header->pnfs_error = -EINVAL;
goto out;
@@ -694,7 +816,26 @@ next_page:
extent_length = be->be_length -
(isect - be->be_f_offset);
}
- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+
+ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
+ pg_offset = offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + count > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = count;
+
+ saved_len = pg_len;
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
+ !bl_is_sector_init(be->be_inval, isect)) {
+ ret = bl_read_partial_page_sync(pages[i], cow_read,
+ pg_offset, pg_len, true);
+ if (ret) {
+ dprintk("%s bl_read_partial_page_sync fail %d\n",
+ __func__, ret);
+ header->pnfs_error = ret;
+ goto out;
+ }
+
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
@@ -703,15 +844,35 @@ next_page:
header->pnfs_error = ret;
goto out;
}
+
+ /* Expand to full page write */
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
+ (pg_len & (SECTOR_SIZE - 1))){
+ /* ahh, nasty case. We have to do sync full sector
+ * read-modify-write cycles.
+ */
+ unsigned int saved_offset = pg_offset;
+ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
+ pg_len, false);
+ pg_offset = round_down(pg_offset, SECTOR_SIZE);
+ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
+ - pg_offset;
}
- bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+
+
+ bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
- bl_end_io_write, par);
+ bl_end_io_write, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
+ offset += saved_len;
+ count -= saved_len;
isect += PAGE_CACHE_SECTORS;
last_isect = isect;
extent_length -= PAGE_CACHE_SECTORS;
@@ -729,17 +890,16 @@ next_page:
}
write_done:
- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
- if (count < wdata->res.count) {
- wdata->res.count = count;
- }
+ wdata->res.count = wdata->args.count;
out:
bl_put_extent(be);
+ bl_put_extent(cow_read);
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
+ bl_put_extent(cow_read);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}
@@ -874,7 +1034,7 @@ static void free_blk_mountid(struct block_mount_id *mid)
}
}
-/* This is mostly copied from the filelayout's get_device_info function.
+/* This is mostly copied from the filelayout_get_device_info function.
* It seems much of this should be at the generic pnfs level.
*/
static struct pnfs_block_dev *
@@ -922,9 +1082,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dev->pgbase = 0;
dev->pglen = PAGE_SIZE * max_pages;
dev->mincount = 0;
+ dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
- rc = nfs4_proc_getdeviceinfo(server, dev);
+ rc = nfs4_proc_getdeviceinfo(server, dev, NULL);
dprintk("%s getdevice info returns %d\n", __func__, rc);
if (rc) {
rv = ERR_PTR(rc);
@@ -1011,39 +1172,110 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0;
}
+static bool
+is_aligned_req(struct nfs_page *req, unsigned int alignment)
+{
+ return IS_ALIGNED(req->wb_offset, alignment) &&
+ IS_ALIGNED(req->wb_bytes, alignment);
+}
+
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
nfs_pageio_reset_read_mds(pgio);
else
pnfs_generic_pg_init_read(pgio, req);
}
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
+bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
+ return 0;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
+}
+
+/*
+ * Return the number of contiguous bytes for a given inode
+ * starting at page frame idx.
+ */
+static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
+{
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t end;
+
+ /* Optimize common case that writes from 0 to end of file */
+ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+ if (end != NFS_I(inode)->npages) {
+ rcu_read_lock();
+ end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+ rcu_read_unlock();
+ }
+
+ if (!end)
+ return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
+ else
+ return (end - idx) << PAGE_CACHE_SHIFT;
+}
+
static void
bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE)) {
nfs_pageio_reset_write_mds(pgio);
- else
- pnfs_generic_pg_init_write(pgio, req);
+ } else {
+ u64 wb_size;
+ if (pgio->pg_dreq == NULL)
+ wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
+ req->wb_index);
+ else
+ wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+
+ pnfs_generic_pg_init_write(pgio, req, wb_size);
+ }
+}
+
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
+bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE))
+ return 0;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
}
static const struct nfs_pageio_ops bl_pg_read_ops = {
.pg_init = bl_pg_init_read,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = bl_pg_test_read,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops bl_pg_write_ops = {
.pg_init = bl_pg_init_write,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = bl_pg_test_write,
.pg_doio = pnfs_generic_pg_writepages,
};
static struct pnfs_layoutdriver_type blocklayout_type = {
.id = LAYOUT_BLOCK_VOLUME,
.name = "LAYOUT_BLOCK_VOLUME",
+ .owner = THIS_MODULE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
.alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 03350690118..9838fb02047 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,11 +36,13 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct block_mount_id {
spinlock_t bm_lock; /* protects list */
@@ -172,8 +174,7 @@ struct bl_msg_hdr {
/* blocklayoutdev.c */
ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
-struct block_device *nfs4_blkdev_get(dev_t dev);
-int nfs4_blkdev_put(struct block_device *bdev);
+void nfs4_blkdev_put(struct block_device *bdev);
struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
struct pnfs_device *dev);
int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index c96554245cc..04303b5c936 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -53,30 +53,14 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
return 0;
}
-/* Open a block_device by device number. */
-struct block_device *nfs4_blkdev_get(dev_t dev)
-{
- struct block_device *bd;
-
- dprintk("%s enter\n", __func__);
- bd = blkdev_get_by_dev(dev, FMODE_READ, NULL);
- if (IS_ERR(bd))
- goto fail;
- return bd;
-fail:
- dprintk("%s failed to open device : %ld\n",
- __func__, PTR_ERR(bd));
- return NULL;
-}
-
/*
* Release the block device
*/
-int nfs4_blkdev_put(struct block_device *bdev)
+void nfs4_blkdev_put(struct block_device *bdev)
{
dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
MINOR(bdev->bd_dev));
- return blkdev_put(bdev, FMODE_READ);
+ blkdev_put(bdev, FMODE_READ);
}
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
@@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server,
goto out;
}
- bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor));
+ bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor),
+ FMODE_READ, NULL);
if (IS_ERR(bd)) {
- rc = PTR_ERR(bd);
- dprintk("%s failed to open device : %d\n", __func__, rc);
- rv = ERR_PTR(rc);
+ dprintk("%s failed to open device : %ld\n", __func__,
+ PTR_ERR(bd));
+ rv = ERR_CAST(bd);
goto out;
}
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17..8999cfddd86 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
bl_pipe_msg.bl_wq = &nn->bl_wq;
memset(msg, 0, sizeof(*msg));
- msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+ msg->len = sizeof(bl_msg) + bl_msg.totallen;
+ msg->data = kzalloc(msg->len, GFP_NOFS);
if (!msg->data)
goto out;
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
memcpy(msg->data, &bl_msg, sizeof(bl_msg));
dataptr = (uint8_t *) msg->data;
memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
- msg->len = sizeof(bl_msg) + bl_msg.totallen;
add_wait_queue(&nn->bl_wq, &wq);
if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
@@ -88,14 +88,8 @@ out:
*/
static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
{
- int rv;
-
dprintk("%s Releasing\n", __func__);
- rv = nfs4_blkdev_put(bdev->bm_mdev);
- if (rv)
- printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
- __func__, rv);
-
+ nfs4_blkdev_put(bdev->bm_mdev);
dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
}
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 1f9a6032796..4d016144256 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
@@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT);
p = xdr_encode_hyper(p, 0LL);
*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
- list_del(&lce->bse_node);
- list_add_tail(&lce->bse_node, &bl->bl_committing);
+ list_move_tail(&lce->bse_node, &bl->bl_committing);
bl->bl_count--;
count++;
}