aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c83
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/callback_proc.c19
-rw-r--r--fs/nfs/delegation.c11
-rw-r--r--fs/nfs/dir.c110
-rw-r--r--fs/nfs/direct.c694
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/file.c76
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)233
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)12
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c257
-rw-r--r--fs/nfs/internal.h75
-rw-r--r--fs/nfs/nfs2xdr.c14
-rw-r--r--fs/nfs/nfs3acl.c330
-rw-r--r--fs/nfs/nfs3proc.c130
-rw-r--r--fs/nfs/nfs3super.c3
-rw-r--r--fs/nfs/nfs3xdr.c16
-rw-r--r--fs/nfs/nfs4_fs.h26
-rw-r--r--fs/nfs/nfs4client.c36
-rw-r--r--fs/nfs/nfs4file.c13
-rw-r--r--fs/nfs/nfs4namespace.c114
-rw-r--r--fs/nfs/nfs4proc.c391
-rw-r--r--fs/nfs/nfs4session.c25
-rw-r--r--fs/nfs/nfs4session.h2
-rw-r--r--fs/nfs/nfs4state.c39
-rw-r--r--fs/nfs/nfs4super.c16
-rw-r--r--fs/nfs/nfs4sysctl.c6
-rw-r--r--fs/nfs/nfs4trace.h8
-rw-r--r--fs/nfs/nfs4xdr.c71
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c24
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c649
-rw-r--r--fs/nfs/pnfs.c250
-rw-r--r--fs/nfs/pnfs.h48
-rw-r--r--fs/nfs/proc.c46
-rw-r--r--fs/nfs/read.c422
-rw-r--r--fs/nfs/super.c29
-rw-r--r--fs/nfs/sysctl.c6
-rw-r--r--fs/nfs/unlink.c35
-rw-r--r--fs/nfs/write.c945
47 files changed, 2860 insertions, 2456 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 03192a66c14..4782e0840dc 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
-obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
-nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
-
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e242bbf7297..9b431f44fad 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -134,8 +134,8 @@ bl_submit_bio(int rw, struct bio *bio)
if (bio) {
get_parallel(bio->bi_private);
dprintk("%s submitting %s bio %u@%llu\n", __func__,
- rw == READ ? "read" : "write",
- bio->bi_size, (unsigned long long)bio->bi_sector);
+ rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+ (unsigned long long)bio->bi_iter.bi_sector);
submit_bio(rw, bio);
}
return NULL;
@@ -156,7 +156,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
}
if (bio) {
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_iter.bi_sector = isect - be->be_f_offset +
+ be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
@@ -201,19 +202,15 @@ static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
static void bl_end_io_read(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec;
+ int i;
- do {
- struct page *page = bvec->bv_page;
+ if (!err)
+ bio_for_each_segment_all(bvec, bio, i)
+ SetPageUptodate(bvec->bv_page);
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
- if (uptodate)
- SetPageUptodate(page);
- } while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
- struct nfs_read_data *rdata = par->data;
+ if (err) {
+ struct nfs_pgio_data *rdata = par->data;
struct nfs_pgio_header *header = rdata->header;
if (!header->pnfs_error)
@@ -227,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -245,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *header = rdata->header;
int i, hole;
@@ -383,21 +380,17 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
static void bl_end_io_write_zero(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-
- do {
- struct page *page = bvec->bv_page;
+ struct bio_vec *bvec;
+ int i;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
+ bio_for_each_segment_all(bvec, bio, i) {
/* This is the zeroing page we added */
- end_page_writeback(page);
- page_cache_release(page);
- } while (bvec >= bio->bi_io_vec);
+ end_page_writeback(bvec->bv_page);
+ page_cache_release(bvec->bv_page);
+ }
- if (unlikely(!uptodate)) {
- struct nfs_write_data *data = par->data;
+ if (unlikely(err)) {
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!header->pnfs_error)
@@ -412,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *data = par->data;
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!uptodate) {
@@ -430,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -445,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(wdata->header->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -519,7 +512,7 @@ bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
(offset / SECTOR_SIZE);
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = bl_read_single_end_io;
@@ -680,7 +673,7 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
{
struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
@@ -1196,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
pnfs_generic_pg_init_read(pgio, req);
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, SECTOR_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -1220,7 +1217,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
if (end != NFS_I(inode)->npages) {
rcu_read_lock();
- end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX);
+ end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
}
@@ -1248,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, PAGE_CACHE_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 8485978993e..9838fb02047 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,6 +36,7 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed..4d016144256 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ae2e87b9545..41db5258e7a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -112,7 +112,8 @@ out:
* TODO: keep track of all layouts (and delegations) in a hash table
* hashed by filehandle.
*/
-static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct nfs_server *server;
struct inode *ino;
@@ -120,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
+ continue;
if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
- continue;
+ break;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
if (NFS_I(ino)->layout != lo) {
spin_unlock(&ino->i_lock);
iput(ino);
- continue;
+ break;
}
pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
@@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
return NULL;
}
-static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct pnfs_layout_hdr *lo;
spin_lock(&clp->cl_lock);
rcu_read_lock();
- lo = get_layout_by_fh_locked(clp, fh);
+ lo = get_layout_by_fh_locked(clp, fh, stateid);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
@@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp,
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);
- lo = get_layout_by_fh(clp, &args->cbl_fh);
+ lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
if (!lo)
- return NFS4ERR_NOMATCHING_LAYOUT;
+ goto out;
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
@@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
iput(ino);
+out:
return rv;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ef792f29f83..5d8ccecf5f5 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL)
+ goto out_enoent;
- if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
- rcu_read_unlock();
- return -ENOENT;
- }
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
+ goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
+out_enoent:
+ rcu_read_unlock();
+ return -ENOENT;
}
static struct inode *
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 812154aff98..4a3d4ef7612 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = {
static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
{
+ struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_open_dir_context *ctx;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
ctx->duped = 0;
- ctx->attr_gencount = NFS_I(dir)->attr_gencount;
+ ctx->attr_gencount = nfsi->attr_gencount;
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
ctx->cred = get_rpccred(cred);
+ spin_lock(&dir->i_lock);
+ list_add(&ctx->list, &nfsi->open_files);
+ spin_unlock(&dir->i_lock);
return ctx;
}
return ERR_PTR(-ENOMEM);
}
-static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
+static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
{
+ spin_lock(&dir->i_lock);
+ list_del(&ctx->list);
+ spin_unlock(&dir->i_lock);
put_rpccred(ctx->cred);
kfree(ctx);
}
@@ -126,7 +133,7 @@ out:
static int
nfs_closedir(struct inode *inode, struct file *filp)
{
- put_nfs_open_dir_context(filp->private_data);
+ put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
return 0;
}
@@ -274,6 +281,15 @@ out_eof:
return -EBADCOOKIE;
}
+static bool
+nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
+{
+ if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+ return false;
+ smp_rmb();
+ return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
+}
+
static
int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
{
@@ -287,8 +303,8 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
struct nfs_open_dir_context *ctx = desc->file->private_data;
new_pos = desc->current_index + i;
- if (ctx->attr_gencount != nfsi->attr_gencount
- || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) {
+ if (ctx->attr_gencount != nfsi->attr_gencount ||
+ !nfs_readdir_inode_mapping_valid(nfsi)) {
ctx->duped = 0;
ctx->attr_gencount = nfsi->attr_gencount;
} else if (new_pos < desc->ctx->pos) {
@@ -297,10 +313,9 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
if (printk_ratelimit()) {
pr_notice("NFS: directory %pD2 contains a readdir loop."
"Please contact your server vendor. "
- "The file: %s has duplicate cookie %llu\n",
- desc->file,
- array->array[i].string.name,
- *desc->dir_cookie);
+ "The file: %.*s has duplicate cookie %llu\n",
+ desc->file, array->array[i].string.len,
+ array->array[i].string.name, *desc->dir_cookie);
}
status = -ELOOP;
goto out;
@@ -428,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir)
set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
}
+/*
+ * This function is mainly for use by nfs_getattr().
+ *
+ * If this is an 'ls -l', we want to force use of readdirplus.
+ * Do this by checking if there is an active file descriptor
+ * and calling nfs_advise_use_readdirplus, then forcing a
+ * cache flush.
+ */
+void nfs_force_use_readdirplus(struct inode *dir)
+{
+ if (!list_empty(&NFS_I(dir)->open_files)) {
+ nfs_advise_use_readdirplus(dir);
+ nfs_zap_mapping(dir, dir->i_mapping);
+ }
+}
+
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
@@ -806,6 +837,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
goto out;
}
+static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
+{
+ struct nfs_inode *nfsi = NFS_I(dir);
+
+ if (nfs_attribute_cache_expired(dir))
+ return true;
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return true;
+ return false;
+}
+
/* The file offset position represents the dirent entry number. A
last cookie cache takes care of the common case of reading the
whole directory.
@@ -838,7 +880,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
nfs_block_sillyrename(dentry);
- if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
+ if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
goto out;
@@ -1404,7 +1446,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
/* Expect a negative dentry */
BUG_ON(dentry->d_inode);
- dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n",
+ dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
err = nfs_check_flags(open_flags);
@@ -1594,7 +1636,7 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
- dfprintk(VFS, "NFS: create(%s/%ld), %pd\n",
+ dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
@@ -1621,7 +1663,7 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n",
+ dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
if (!new_valid_dev(rdev))
@@ -1650,7 +1692,7 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n",
+ dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_valid = ATTR_MODE;
@@ -1678,7 +1720,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n",
+ dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
trace_nfs_rmdir_enter(dir, dentry);
@@ -1747,7 +1789,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
int error;
int need_rehash = 0;
- dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id,
+ dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
dir->i_ino, dentry);
trace_nfs_unlink_enter(dir, dentry);
@@ -1798,7 +1840,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
unsigned int pathlen = strlen(symname);
int error;
- dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id,
+ dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry, symname);
if (pathlen > PAGE_SIZE)
@@ -1821,7 +1863,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
- dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n",
+ dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
dentry, symname, error);
d_drop(dentry);
@@ -1837,6 +1879,11 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
GFP_KERNEL)) {
SetPageUptodate(page);
unlock_page(page);
+ /*
+ * add_to_page_cache_lru() grabs an extra page refcount.
+ * Drop it here to avoid leaking this page later.
+ */
+ page_cache_release(page);
} else
__free_page(page);
@@ -1897,6 +1944,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct dentry *dentry = NULL, *rehash = NULL;
+ struct rpc_task *task;
int error = -EBUSY;
dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
@@ -1944,8 +1992,16 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
- new_dir, &new_dentry->d_name);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ goto out;
+ }
+
+ error = rpc_wait_for_completion_task(task);
+ if (error == 0)
+ error = task->tk_status;
+ rpc_put_task(task);
nfs_mark_for_revalidate(old_inode);
out:
if (rehash)
@@ -1976,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
kfree(entry);
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
- smp_mb__after_atomic_dec();
+ smp_mb__after_atomic();
}
static void nfs_access_free_list(struct list_head *head)
@@ -2026,9 +2082,9 @@ nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
else {
remove_lru_entry:
list_del_init(&nfsi->access_cache_inode_lru);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
}
spin_unlock(&inode->i_lock);
}
@@ -2176,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
nfs_access_add_rbtree(inode, cache);
/* Update accounting */
- smp_mb__before_atomic_inc();
+ smp_mb__before_atomic();
atomic_long_inc(&nfs_access_nr_entries);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
/* Add inode to global LRU list */
if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
@@ -2304,7 +2360,7 @@ out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES;
- dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d71d66c9e0a..f11b9eed0de 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+ struct nfs_client *ds_clp,
+ int ds_idx)
+{
+ struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+ if (ds_clp) {
+ /* pNFS is in use, use the DS verf */
+ if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+ verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+ else
+ WARN_ON_ONCE(1);
+ }
+#endif
+ return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ WARN_ON_ONCE(verfp->committed >= 0);
+ memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+ WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ if (verfp->committed < 0) {
+ nfs_direct_set_hdr_verf(dreq, hdr);
+ return 0;
+ }
+ return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+ struct nfs_commit_data *data)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+ data->ds_commit_index);
+ WARN_ON_ONCE(verfp->committed < 0);
+ return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write)
@@ -121,20 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
* shunt off direct read and write requests before the VFS gets them,
* so this method is only ever called for swap.
*/
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
#ifndef CONFIG_NFS_SWAP
dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
- iocb->ki_filp, (long long) pos, nr_segs);
+ iocb->ki_filp, (long long) pos, iter->nr_segs);
return -EINVAL;
#else
VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
if (rw == READ || rw == KERNEL_READ)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_read(iocb, iter, pos,
rw == READ ? true : false);
- return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_write(iocb, iter, pos,
rw == WRITE ? true : false);
#endif /* CONFIG_NFS_SWAP */
}
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
@@ -222,14 +314,31 @@ out:
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
-static void nfs_direct_complete(struct nfs_direct_req *dreq)
+static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
{
+ struct inode *inode = dreq->inode;
+
+ if (dreq->iocb && write) {
+ loff_t pos = dreq->iocb->ki_pos + dreq->count;
+
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < pos)
+ i_size_write(inode, pos);
+ spin_unlock(&inode->i_lock);
+ }
+
+ if (write)
+ nfs_zap_mapping(inode, inode->i_mapping);
+
+ inode_dio_done(inode);
+
if (dreq->iocb) {
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
}
+
complete_all(&dreq->completion);
nfs_direct_req_release(dreq);
@@ -237,9 +346,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
static void nfs_direct_readpage_release(struct nfs_page *req)
{
- dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
@@ -272,7 +381,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
hdr->release(hdr);
}
@@ -305,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t rsize = NFS_SERVER(inode)->rsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ struct iov_iter *iter,
+ loff_t pos)
+{
+ struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
+ ssize_t result = -EINVAL;
+ size_t requested_bytes = 0;
+ size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *),
- GFP_KERNEL);
- if (!pagevec)
- break;
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 1, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 1, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
+ nfs_pageio_init_read(&desc, dreq->inode, false,
+ &nfs_direct_read_completion_ops);
+ get_dreq(dreq);
+ desc.pg_dreq = dreq;
+ atomic_inc(&inode->i_dio_count);
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ rsize, &pgbase);
+ if (result < 0)
+ break;
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -372,54 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
}
req->wb_index = pos >> PAGE_SHIFT;
req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
nfs_release_request(req);
break;
}
pgbase = 0;
bytes -= req_len;
- started += req_len;
- user_addr += req_len;
+ requested_bytes += req_len;
pos += req_len;
- count -= req_len;
dreq->bytes_left -= req_len;
}
- /* The nfs_page now hold references to these pages */
nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
-{
- struct nfs_pageio_descriptor desc;
- ssize_t result = -EINVAL;
- size_t requested_bytes = 0;
- unsigned long seg;
-
- NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
- &nfs_direct_read_completion_ops);
- get_dreq(dreq);
- desc.pg_dreq = dreq;
-
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+ kvfree(pagevec);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
- break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -429,29 +481,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
+ inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
return 0;
}
-static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @iter: vector of user buffers into which to read data
+ * @pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file. For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change. Our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ *
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
+ ssize_t result = -EINVAL;
+ size_t count = iov_iter_count(iter);
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
+
+ dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
+ file, count, (long long) pos);
+
+ result = 0;
+ if (!count)
+ goto out;
+ mutex_lock(&inode->i_mutex);
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+
+ task_io_account_read(count);
+
+ result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
- goto out;
+ goto out_unlock;
dreq->inode = inode;
- dreq->bytes_left = iov_length(iov, nr_segs);
+ dreq->bytes_left = count;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
@@ -462,22 +554,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- NFS_I(inode)->read_io += iov_length(iov, nr_segs);
- result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
+ NFS_I(inode)->read_io += count;
+ result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
result = nfs_direct_wait(dreq);
+ if (result > 0)
+ iocb->ki_pos = pos + result;
+ }
+
+ nfs_direct_req_release(dreq);
+ return result;
+
out_release:
nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
return result;
}
-static void nfs_inode_dio_write_done(struct inode *inode)
-{
- nfs_zap_mapping(inode, inode->i_mapping);
- inode_dio_done(inode);
-}
-
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
@@ -496,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
dreq->count = 0;
get_dreq(dreq);
- NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
@@ -535,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
dprintk("NFS: %5u commit failed with error %d.\n",
data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
@@ -593,8 +691,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_inode_dio_write_done(dreq->inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
}
@@ -610,114 +707,10 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_inode_dio_write_done(inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
#endif
-/*
- * NB: Return the value of the first error return code. Subsequent
- * errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation. If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes. Write length accounting is
- * handled automatically by nfs_direct_write_result(). Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t wsize = NFS_SERVER(inode)->wsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
-
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
- if (!pagevec)
- break;
-
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 0, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 0, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
-
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
-
- for (i = 0; i < npages; i++) {
- struct nfs_page *req;
- unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
- pgbase, req_len);
- if (IS_ERR(req)) {
- result = PTR_ERR(req);
- break;
- }
- nfs_lock_request(req);
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
- nfs_unlock_and_release_request(req);
- break;
- }
- pgbase = 0;
- bytes -= req_len;
- started += req_len;
- user_addr += req_len;
- pos += req_len;
- count -= req_len;
- dreq->bytes_left -= req_len;
- }
- /* The nfs_page now hold references to these pages */
- nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
@@ -747,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
bit = NFS_IOHDR_NEED_RESCHED;
else if (dreq->flags == 0) {
- memcpy(&dreq->verf, hdr->verf,
- sizeof(dreq->verf));
+ nfs_direct_set_hdr_verf(dreq, hdr);
bit = NFS_IOHDR_NEED_COMMIT;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ dreq->flags =
+ NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else
bit = NFS_IOHDR_NEED_COMMIT;
@@ -763,6 +756,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
+
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
switch (bit) {
@@ -797,33 +791,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.completion = nfs_direct_write_completion,
};
+
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation. If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes. Write length accounting is
+ * handled automatically by nfs_direct_write_result(). Otherwise, if
+ * no requests have been sent, just return an error.
+ */
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
+ struct iov_iter *iter,
+ loff_t pos)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
ssize_t result = 0;
size_t requested_bytes = 0;
- unsigned long seg;
+ size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
- NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+ nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
atomic_inc(&inode->i_dio_count);
- NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+ NFS_I(inode)->write_io += iov_iter_count(iter);
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ wsize, &pgbase);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ requested_bytes += req_len;
+ pos += req_len;
+ dreq->bytes_left -= req_len;
+ }
+ nfs_direct_release_pages(pagevec, npages);
+ kvfree(pagevec);
+ if (result < 0)
break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -842,98 +880,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
return 0;
}
-static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos,
- size_t count, bool uio)
-{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct nfs_direct_req *dreq;
- struct nfs_lock_context *l_ctx;
-
- dreq = nfs_direct_req_alloc();
- if (!dreq)
- goto out;
-
- dreq->inode = inode;
- dreq->bytes_left = count;
- dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- l_ctx = nfs_get_lock_context(dreq->ctx);
- if (IS_ERR(l_ctx)) {
- result = PTR_ERR(l_ctx);
- goto out_release;
- }
- dreq->l_ctx = l_ctx;
- if (!is_sync_kiocb(iocb))
- dreq->iocb = iocb;
-
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
- result = nfs_direct_wait(dreq);
-out_release:
- nfs_direct_req_release(dreq);
-out:
- return result;
-}
-
-/**
- * nfs_file_direct_read - file direct read operation for NFS files
- * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
- * @pos: byte offset in file where reading starts
- *
- * We use this function for direct reads instead of calling
- * generic_file_aio_read() in order to avoid gfar's check to see if
- * the request starts before the end of the file. For that check
- * to work, we must generate a GETATTR before each direct read, and
- * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. Our preference is simply
- * to do all reads the application wants, and the server will take
- * care of managing the end of file boundary.
- *
- * This function also eliminates unnecessarily updating the file's
- * atime locally, as the NFS server sets the file's atime, and this
- * client must read the updated atime from the server back into its
- * cache.
- */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
-{
- ssize_t retval = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- size_t count;
-
- count = iov_length(iov, nr_segs);
- nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
-
- dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
- file, count, (long long) pos);
-
- retval = 0;
- if (!count)
- goto out;
-
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
-
- task_io_account_read(count);
-
- retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
- if (retval > 0)
- iocb->ki_pos = pos + retval;
-
-out:
- return retval;
-}
-
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -951,49 +901,97 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t retval = -EINVAL;
+ ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count;
+ struct inode *inode = mapping->host;
+ struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
+ loff_t end;
+ size_t count = iov_iter_count(iter);
+ end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
- count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, count, (long long) pos);
- retval = generic_write_checks(file, &pos, &count, 0);
- if (retval)
+ result = generic_write_checks(file, &pos, &count, 0);
+ if (result)
goto out;
- retval = -EINVAL;
+ result = -EINVAL;
if ((ssize_t) count < 0)
goto out;
- retval = 0;
+ result = 0;
if (!count)
goto out;
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
+ mutex_lock(&inode->i_mutex);
+
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+
+ if (mapping->nrpages) {
+ result = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
+ if (result)
+ goto out_unlock;
+ }
task_io_account_write(count);
- retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
- if (retval > 0) {
- struct inode *inode = mapping->host;
+ result = -ENOMEM;
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ goto out_unlock;
- iocb->ki_pos = pos + retval;
- spin_lock(&inode->i_lock);
- if (i_size_read(inode) < iocb->ki_pos)
- i_size_write(inode, iocb->ki_pos);
- spin_unlock(&inode->i_lock);
+ dreq->inode = inode;
+ dreq->bytes_left = count;
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
+ goto out_release;
+ }
+ dreq->l_ctx = l_ctx;
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+
+ if (mapping->nrpages) {
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
}
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
+ result = nfs_direct_wait(dreq);
+ if (result > 0) {
+ struct inode *inode = mapping->host;
+
+ iocb->ki_pos = pos + result;
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
+ }
+ nfs_direct_req_release(dreq);
+ return result;
+
+out_release:
+ nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
- return retval;
+ return result;
}
/**
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index fc0f95ec735..d25f10fb492 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e2fcacf07de..4042ff58fe3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id)
EXPORT_SYMBOL_GPL(nfs_file_flush);
ssize_t
-nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+ return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
- dprintk("NFS: read(%pD2, %lu@%lu)\n",
+ dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
- (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+ iov_iter_count(to), (unsigned long) iocb->ki_pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
- result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+ result = generic_file_read_iter(iocb, to);
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
}
@@ -354,7 +353,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
int once_thru = 0;
- dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n",
+ dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
start:
@@ -395,7 +394,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
- dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n",
+ dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
/*
@@ -585,7 +584,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
int ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
- dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n",
+ dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
(long long)page_offset(page));
@@ -617,6 +616,7 @@ out:
static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
@@ -634,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
return 0;
}
-ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
unsigned long written = 0;
ssize_t result;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(from);
+ loff_t pos = iocb->ki_pos;
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
if (file->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+ return nfs_file_direct_write(iocb, from, pos, true);
- dprintk("NFS: write(%pD2, %lu@%Ld)\n",
- file, (unsigned long) count, (long long) pos);
+ dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+ file, count, (long long) pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -669,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
if (!count)
goto out;
- result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ result = generic_file_write_iter(iocb, from);
if (result > 0)
written = result;
@@ -690,36 +690,6 @@ out_swapfile:
}
EXPORT_SYMBOL_GPL(nfs_file_write);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
- struct file *filp, loff_t *ppos,
- size_t count, unsigned int flags)
-{
- struct inode *inode = file_inode(filp);
- unsigned long written = 0;
- ssize_t ret;
-
- dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
- filp, (unsigned long) count, (unsigned long long) *ppos);
-
- /*
- * The combination of splice and an O_APPEND destination is disallowed.
- */
-
- ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
- if (ret > 0)
- written = ret;
-
- if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
- int err = vfs_fsync(filp, 0);
- if (err < 0)
- ret = err;
- }
- if (ret > 0)
- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_write);
-
static int
do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
@@ -915,10 +885,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
- fl->fl_owner = (fl_owner_t)filp;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
-
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
@@ -938,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = nfs_file_read,
+ .write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -950,7 +916,7 @@ const struct file_operations nfs_file_operations = {
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
- .splice_write = nfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 00000000000..8516cdffb9e
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the pNFS Files Layout Driver kernel module
+#
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
index b86464ba25e..d2eba1c13b7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -35,11 +35,11 @@
#include <linux/sunrpc/metrics.h>
-#include "nfs4session.h"
-#include "internal.h"
-#include "delegation.h"
-#include "nfs4filelayout.h"
-#include "nfs4trace.h"
+#include "../nfs4session.h"
+#include "../internal.h"
+#include "../delegation.h"
+#include "filelayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -84,17 +84,17 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
- "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
- (long long)NFS_FILEID(hdr->inode),
+ (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
@@ -105,17 +105,17 @@ static void filelayout_reset_write(struct nfs_write_data *data)
}
}
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
- "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
- (long long)NFS_FILEID(hdr->inode),
+ (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
@@ -243,7 +243,7 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used.
*/
static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
return;
pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
rpc_exit(task, -EIO);
@@ -317,26 +317,29 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
rpc_exit(task, 0);
return;
}
- rdata->read_done_cb = filelayout_read_done_cb;
+ rdata->pgio_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args,
&rdata->res.seq_res,
task))
return;
- nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
- rdata->args.lock_context, FMODE_READ);
+ if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
+ rdata->args.lock_context, FMODE_READ) == -EIO)
+ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ nfs41_sequence_done(task, &rdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
rdata->header->mds_ops->rpc_call_done(task, data);
@@ -344,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -360,7 +363,7 @@ static void filelayout_read_release(void *data)
}
static int filelayout_write_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
@@ -416,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
rpc_exit(task, -EIO);
@@ -433,17 +436,20 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
&wdata->res.seq_res,
task))
return;
- nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
- wdata->args.lock_context, FMODE_WRITE);
+ if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
+ wdata->args.lock_context, FMODE_WRITE) == -EIO)
+ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ nfs41_sequence_done(task, &wdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
wdata->header->mds_ops->rpc_call_done(task, data);
@@ -451,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -523,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};
static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -554,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
/* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
@@ -562,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
- nfs_initiate_read(ds_clnt, data,
- &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+ nfs_initiate_pgio(ds_clnt, data,
+ &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -594,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
- data->write_done_cb = filelayout_write_done_cb;
+ data->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
- /*
- * Get the file offset on the dserver. Set the write offset to
- * this offset and save the original offset.
- */
+
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- nfs_initiate_write(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, data,
&filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
@@ -631,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_deviceid_node *d;
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
- struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
dprintk("--> %s\n", __func__);
@@ -649,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
- if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
+ if (!fl->stripe_unit) {
dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit);
goto out;
@@ -686,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out_put;
}
- if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
- dprintk("%s Stripe unit (%u) not aligned with rsize %u "
- "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
- nfss->wsize);
- }
-
status = 0;
out:
dprintk("--> %s returns %d\n", __func__, status);
@@ -844,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
struct pnfs_commit_bucket *buckets;
- int size;
+ int size, i;
if (fl->commit_through_mds)
return 0;
- if (cinfo->ds->nbuckets != 0) {
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ if (cinfo->ds->nbuckets >= size) {
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of <multipath_list4, fh> keys, each
@@ -858,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
return 0;
}
- size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
gfp_flags);
if (!buckets)
return -ENOMEM;
- else {
- int i;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ /* mark direct verifier as unset */
+ buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ }
- spin_lock(cinfo->lock);
- if (cinfo->ds->nbuckets != 0)
- kfree(buckets);
- else {
- cinfo->ds->buckets = buckets;
- cinfo->ds->nbuckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&buckets[i].written);
- INIT_LIST_HEAD(&buckets[i].committing);
- }
- }
- spin_unlock(cinfo->lock);
- return 0;
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets >= size)
+ goto out;
+ for (i = 0; i < cinfo->ds->nbuckets; i++) {
+ list_splice(&cinfo->ds->buckets[i].written,
+ &buckets[i].written);
+ list_splice(&cinfo->ds->buckets[i].committing,
+ &buckets[i].committing);
+ buckets[i].direct_verf.committed =
+ cinfo->ds->buckets[i].direct_verf.committed;
+ buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
+ buckets[i].clseg = cinfo->ds->buckets[i].clseg;
}
+ swap(cinfo->ds->buckets, buckets);
+ cinfo->ds->nbuckets = size;
+out:
+ spin_unlock(cinfo->lock);
+ kfree(buckets);
+ return 0;
}
static struct pnfs_layout_segment *
@@ -909,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
- * return true : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
-static bool
+static size_t
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
+ unsigned int size;
u64 p_stripe, r_stripe;
- u32 stripe_unit;
+ u32 stripe_offset;
+ u64 segment_offset = pgio->pg_lseg->pls_range.offset;
+ u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
- if (!pnfs_generic_pg_test(pgio, prev, req) ||
- !nfs_generic_pg_test(pgio, prev, req))
- return false;
+ /* calls nfs_generic_pg_test */
+ size = pnfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
- p_stripe = (u64)req_offset(prev);
- r_stripe = (u64)req_offset(req);
- stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+ /* see if req and prev are in the same stripe */
+ if (prev) {
+ p_stripe = (u64)req_offset(prev) - segment_offset;
+ r_stripe = (u64)req_offset(req) - segment_offset;
+ do_div(p_stripe, stripe_unit);
+ do_div(r_stripe, stripe_unit);
- do_div(p_stripe, stripe_unit);
- do_div(r_stripe, stripe_unit);
+ if (p_stripe != r_stripe)
+ return 0;
+ }
- return (p_stripe == r_stripe);
+ /* calculate remaining bytes in the current stripe */
+ div_u64_rem((u64)req_offset(req) - segment_offset,
+ stripe_unit,
+ &stripe_offset);
+ WARN_ON_ONCE(stripe_offset > stripe_unit);
+ if (stripe_offset >= stripe_unit)
+ return 0;
+ return min(stripe_unit - (unsigned int)stripe_offset, size);
}
static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase) {
- /*
- * Handling unaligned pages is difficult, because have to
- * somehow split a req in two in certain cases in the
- * pg.test code. Avoid this by just not using pnfs
- * in this case.
- */
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -967,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase)
- goto out_mds;
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -1061,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
*/
j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
+ spin_lock(cinfo->lock);
buckets = cinfo->ds->buckets;
list = &buckets[i].written;
if (list_empty(list)) {
@@ -1074,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
+ spin_unlock(cinfo->lock);
return list;
}
@@ -1170,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
return ret;
}
+/* Note called with cinfo->lock held. */
static int
filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct nfs_commit_info *cinfo,
@@ -1214,19 +1226,22 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo)
{
struct pnfs_commit_bucket *b;
+ struct pnfs_layout_segment *freeme;
int i;
- /* NOTE cinfo->lock is NOT held, relying on fact that this is
- * only called on single thread per dreq.
- * Can't take the lock because need to do pnfs_put_lseg
- */
+restart:
+ spin_lock(cinfo->lock);
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
- pnfs_put_lseg(b->wlseg);
+ freeme = b->wlseg;
b->wlseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
+ goto restart;
}
}
cinfo->ds->nwritten = 0;
+ spin_unlock(cinfo->lock);
}
static unsigned int
@@ -1237,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;
+ struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
@@ -1247,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (!data)
break;
data->ds_commit_index = i;
+ spin_lock(cinfo->lock);
data->lseg = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
list_add(&data->pages, list);
nreq++;
}
@@ -1258,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- pnfs_put_lseg(bucket->clseg);
+ spin_lock(cinfo->lock);
+ freeme = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
}
/* Caller will clean up entries put on list */
return nreq;
@@ -1324,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return &flo->generic_hdr;
+ return flo != NULL ? &flo->generic_hdr : NULL;
}
static void
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
index cebd20e7e92..ffbddf2219e 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -30,7 +30,7 @@
#ifndef FS_NFS_NFS4FILELAYOUT_H
#define FS_NFS_NFS4FILELAYOUT_H
-#include "pnfs.h"
+#include "../pnfs.h"
/*
* Default data server connection timeout and retrans vaules.
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index c7c295e556e..44bf0140a4c 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -33,9 +33,9 @@
#include <linux/module.h>
#include <linux/sunrpc/addr.h>
-#include "internal.h"
-#include "nfs4session.h"
-#include "nfs4filelayout.h"
+#include "../internal.h"
+#include "../nfs4session.h"
+#include "filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -95,7 +95,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
- if (ipv6_addr_scope(&a6->sin6_addr) ==
+ if (ipv6_addr_src_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
@@ -789,9 +789,9 @@ static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 66984a9aafa..b94f80420a5 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
security_d_instantiate(ret, inode);
spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+ if (IS_ROOT(ret) && !ret->d_fsdata &&
+ !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
ret->d_fsdata = name;
name = NULL;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 18ab2da4eeb..9927913c97c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL_GPL(nfs_clear_inode);
void nfs_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
nfs_clear_inode(inode);
}
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
return ret;
}
+static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= flags;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
+}
+
/*
* Invalidate the local caches
*/
@@ -162,19 +173,17 @@ static void nfs_zap_caches_locked(struct inode *inode)
memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
- nfs_fscache_invalidate(inode);
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_LABEL
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_DATA
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
- | NFS_INO_REVAL_PAGECACHE;
+ | NFS_INO_REVAL_PAGECACHE);
} else
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_LABEL
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
- | NFS_INO_REVAL_PAGECACHE;
+ | NFS_INO_REVAL_PAGECACHE);
+ nfs_zap_label_cache_locked(nfsi);
}
void nfs_zap_caches(struct inode *inode)
@@ -188,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
{
if (mapping->nrpages != 0) {
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
spin_unlock(&inode->i_lock);
}
}
@@ -210,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -266,6 +274,13 @@ nfs_init_locked(struct inode *inode, void *opaque)
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static void nfs_clear_label_invalid(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL;
+ spin_unlock(&inode->i_lock);
+}
+
void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
@@ -283,6 +298,7 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
__func__,
(char *)label->label,
label->len, error);
+ nfs_clear_label_invalid(inode);
}
}
@@ -312,7 +328,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
}
EXPORT_SYMBOL_GPL(nfs4_label_alloc);
#else
-void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
}
@@ -362,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_mode = fattr->mode;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -408,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_REVAL_PAGECACHE;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -458,9 +474,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
- dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
+ dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
+ (unsigned long long)NFS_FILEID(inode),
nfs_display_fhandle_hash(fh),
atomic_read(&inode->i_count));
@@ -543,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
spin_lock(&inode->i_lock);
i_size_write(inode, offset);
+ /* Optimisation */
+ if (offset == 0)
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
@@ -571,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_uid = attr->ia_uid;
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL);
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -581,6 +601,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
+static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ parent = dget_parent(dentry);
+ nfs_force_use_readdirplus(parent->d_inode);
+ dput(parent);
+}
+
+static bool nfs_need_revalidate_inode(struct inode *inode)
+{
+ if (NFS_I(inode)->cache_validity &
+ (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+ return true;
+ if (nfs_attribute_cache_expired(inode))
+ return true;
+ return false;
+}
+
int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
@@ -609,10 +648,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
need_atime = 0;
- if (need_atime)
- err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- else
- err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (need_atime || nfs_need_revalidate_inode(inode)) {
+ struct nfs_server *server = NFS_SERVER(inode);
+
+ if (server->caps & NFS_CAP_READDIRPLUS)
+ nfs_request_parent_use_readdirplus(dentry);
+ err = __nfs_revalidate_inode(server, inode);
+ }
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
@@ -870,8 +912,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
struct nfs_fattr *fattr = NULL;
struct nfs_inode *nfsi = NFS_I(inode);
- dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
- inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+ dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n",
+ inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode));
trace_nfs_revalidate_inode_enter(inode);
@@ -895,9 +937,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label);
if (status != 0) {
- dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode), status);
+ (unsigned long long)NFS_FILEID(inode), status);
if (status == -ESTALE) {
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
@@ -908,9 +950,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
status = nfs_refresh_inode(inode, fattr);
if (status) {
- dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode), status);
+ (unsigned long long)NFS_FILEID(inode), status);
goto err_out;
}
@@ -919,9 +961,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
nfs_setsecurity(inode, fattr, label);
- dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
+ dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode));
+ (unsigned long long)NFS_FILEID(inode));
err_out:
nfs4_label_free(label);
@@ -954,9 +996,7 @@ int nfs_attribute_cache_expired(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
- if (!(NFS_I(inode)->cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
- && !nfs_attribute_cache_expired(inode))
+ if (!nfs_need_revalidate_inode(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode);
}
@@ -977,16 +1017,17 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (ret < 0)
return ret;
}
- spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode)) {
+ spin_lock(&inode->i_lock);
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode->i_lock);
+ }
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
- dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
- inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+ dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n",
+ inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(inode));
return 0;
}
@@ -1007,6 +1048,7 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
int ret = 0;
/* swapfiles are not supposed to be shared. */
@@ -1018,12 +1060,46 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
if (ret < 0)
goto out;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
- trace_nfs_invalidate_mapping_enter(inode);
- ret = nfs_invalidate_mapping(inode, mapping);
- trace_nfs_invalidate_mapping_exit(inode, ret);
+
+ /*
+ * We must clear NFS_INO_INVALID_DATA first to ensure that
+ * invalidations that come in while we're shooting down the mappings
+ * are respected. But, that leaves a race window where one revalidator
+ * can clear the flag, and then another checks it before the mapping
+ * gets invalidated. Fix that by serializing access to this part of
+ * the function.
+ *
+ * At the same time, we need to allow other tasks to see whether we
+ * might be in the middle of invalidating the pages, so we only set
+ * the bit lock here if it looks like we're going to be doing that.
+ */
+ for (;;) {
+ ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+ if (ret)
+ goto out;
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ break;
+ spin_unlock(&inode->i_lock);
+ goto out;
}
+ set_bit(NFS_INO_INVALIDATING, bitlock);
+ smp_wmb();
+ nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
+ spin_unlock(&inode->i_lock);
+ trace_nfs_invalidate_mapping_enter(inode);
+ ret = nfs_invalidate_mapping(inode, mapping);
+ trace_nfs_invalidate_mapping_exit(inode, ret);
+
+ clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
+ smp_mb__after_atomic();
+ wake_up_bit(bitlock, NFS_INO_INVALIDATING);
out:
return ret;
}
@@ -1038,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& inode->i_version == fattr->pre_change_attr) {
inode->i_version = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
/* If we have atomic WCC data, we may update some attributes */
@@ -1054,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -1065,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
ret |= NFS_INO_INVALID_ATTR;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
-
return ret;
}
@@ -1126,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
- nfsi->cache_validity |= invalid;
+ nfs_set_cache_invalid(inode, invalid);
nfsi->read_cache_jiffies = fattr->time_start;
return 0;
@@ -1282,12 +1355,28 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
+/*
+ * Don't trust the change_attribute, mtime, ctime or size if
+ * a pnfs LAYOUTCOMMIT is outstanding
+ */
+static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode,
+ struct nfs_fattr *fattr)
+{
+ if (pnfs_layoutcommit_outstanding(inode))
+ fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE |
+ NFS_ATTR_FATTR_MTIME |
+ NFS_ATTR_FATTR_CTIME |
+ NFS_ATTR_FATTR_SIZE);
+}
+
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
int ret;
trace_nfs_refresh_inode_enter(inode);
+ nfs_inode_attrs_handle_layoutcommit(inode, fattr);
+
if (nfs_inode_attrs_need_update(inode, fattr))
ret = nfs_update_inode(inode, fattr);
else
@@ -1323,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- if (S_ISDIR(inode->i_mode)) {
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
- }
+ if (S_ISDIR(inode->i_mode))
+ invalid |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, invalid);
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
return nfs_refresh_inode_locked(inode, fattr);
@@ -1434,7 +1521,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long now = jiffies;
unsigned long save_cache_validity;
- dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
+ dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
@@ -1455,7 +1542,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/*
* Big trouble! The inode has become a different object.
*/
- printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
+ printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n",
__func__, inode->i_ino, inode->i_mode, fattr->mode);
goto out_err;
}
@@ -1496,18 +1583,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
- invalid |= save_cache_validity;
+ nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1517,10 +1606,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
- if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
- new_isize > cur_isize) {
+ if ((nfsi->npages == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ invalid &= ~NFS_INO_REVAL_PAGECACHE;
}
dprintk("NFS: isize change on server for file %s/%ld "
"(%Ld to %Ld)\n",
@@ -1530,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)new_isize);
}
} else
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
@@ -1538,7 +1628,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1549,7 +1640,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
}
} else if (server->caps & NFS_CAP_MODE)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1560,7 +1652,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid;
}
} else if (server->caps & NFS_CAP_OWNER)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1571,7 +1664,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
}
} else if (server->caps & NFS_CAP_OWNER_GROUP)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1584,7 +1678,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1597,7 +1692,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_blocks = fattr->du.nfs2.blocks;
/* Update attrtimeo value if we're out of the unstable period */
- if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) {
+ if (invalid & NFS_INO_INVALID_ATTR) {
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
@@ -1610,17 +1705,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
}
}
invalid &= ~NFS_INO_INVALID_ATTR;
- invalid &= ~NFS_INO_INVALID_LABEL;
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
invalid &= ~NFS_INO_INVALID_DATA;
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
(save_cache_validity & NFS_INO_REVAL_FORCED))
- nfsi->cache_validity |= invalid;
-
- if (invalid & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, invalid);
return 0;
out_err:
@@ -1641,10 +1732,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return NULL;
nfsi->flags = 0UL;
nfsi->cache_validity = 0UL;
-#ifdef CONFIG_NFS_V3_ACL
- nfsi->acl_access = ERR_PTR(-EAGAIN);
- nfsi->acl_default = ERR_PTR(-EAGAIN);
-#endif
#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bca6a3e3c49..f415cbf9f6c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -176,7 +176,8 @@ extern struct nfs_server *nfs4_create_server(
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen);
+ struct sockaddr *sap, size_t salen,
+ struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
@@ -230,13 +231,21 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
-extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
+extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_release(struct nfs_pgio_data *);
+int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+ const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
+
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
c->flags = 0;
@@ -269,6 +278,30 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
+static inline void nfs4_label_free(struct nfs4_label *label)
+{
+ if (label) {
+ kfree(label->label);
+ kfree(label);
+ }
+ return;
+}
+
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+ if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL))
+ nfsi->cache_validity |= NFS_INO_INVALID_LABEL;
+}
+#else
+static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
+static inline void nfs4_label_free(void *label) {}
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
@@ -276,6 +309,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const char *ip_addr);
/* dir.c */
+extern void nfs_force_use_readdirplus(struct inode *dir);
extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
@@ -294,16 +328,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
loff_t nfs_file_llseek(struct file *, loff_t, int);
int nfs_file_flush(struct file *, fl_owner_t);
-ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
-ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
int nfs_lock(struct file *, int, struct file_lock *);
int nfs_flock(struct file *, int, struct file_lock *);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
- size_t, unsigned int);
int nfs_check_flags(int);
int nfs_setlease(struct file *, long, struct file_lock **);
@@ -369,19 +401,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
struct nfs_pgio_completion_ops;
/* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* super.c */
void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -396,19 +420,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
-extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -421,6 +436,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
struct nfs_commit_info *cinfo);
int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max);
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
int nfs_scan_commit(struct inode *inode, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_mark_request_commit(struct nfs_page *req,
@@ -449,6 +465,13 @@ extern int nfs_migrate_page(struct address_space *,
#define nfs_migrate_page NULL
#endif
+/* unlink.c */
+extern struct rpc_task *
+nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+ struct dentry *old_dentry, struct dentry *new_dentry,
+ void (*complete)(struct rpc_task *, struct nfs_renamedata *));
+extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
@@ -459,7 +482,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339e..5f61b83f4a1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
/*
* typedef opaque nfsdata<>;
*/
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
{
u32 recvd, count;
__be32 *p;
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
* };
*/
static void encode_readargs(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_readargs(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
* };
*/
static void encode_writeargs(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_writeargs(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -857,7 +857,7 @@ out_default:
* };
*/
static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -878,7 +878,7 @@ out_default:
}
static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
/* All NFSv2 writes are "file sync" writes */
result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 4a1aafba6a2..8f854dde415 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -10,179 +10,7 @@
#define NFSDBG_FACILITY NFSDBG_PROC
-ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int pos=0, len=0;
-
-# define output(s) do { \
- if (pos + sizeof(s) <= size) { \
- memcpy(buffer + pos, s, sizeof(s)); \
- pos += sizeof(s); \
- } \
- len += sizeof(s); \
- } while(0)
-
- acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- output("system.posix_acl_access");
- posix_acl_release(acl);
- }
-
- if (S_ISDIR(inode->i_mode)) {
- acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- output("system.posix_acl_default");
- posix_acl_release(acl);
- }
- }
-
-# undef output
-
- if (!buffer || len <= size)
- return len;
- return -ERANGE;
-}
-
-ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int type, error = 0;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- acl = nfs3_proc_getacl(inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- else if (acl) {
- if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
- error = -ENODATA;
- else
- error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- } else
- error = -ENODATA;
-
- return error;
-}
-
-int nfs3_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int type, error;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- error = nfs3_proc_setacl(inode, type, acl);
- posix_acl_release(acl);
-
- return error;
-}
-
-int nfs3_removexattr(struct dentry *dentry, const char *name)
-{
- struct inode *inode = dentry->d_inode;
- int type;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- return nfs3_proc_setacl(inode, type, NULL);
-}
-
-static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
-{
- if (!IS_ERR(nfsi->acl_access)) {
- posix_acl_release(nfsi->acl_access);
- nfsi->acl_access = ERR_PTR(-EAGAIN);
- }
- if (!IS_ERR(nfsi->acl_default)) {
- posix_acl_release(nfsi->acl_default);
- nfsi->acl_default = ERR_PTR(-EAGAIN);
- }
-}
-
-void nfs3_forget_cached_acls(struct inode *inode)
-{
- dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id,
- inode->i_ino);
- spin_lock(&inode->i_lock);
- __nfs3_forget_cached_acls(NFS_I(inode));
- spin_unlock(&inode->i_lock);
-}
-
-static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct posix_acl *acl = ERR_PTR(-EINVAL);
-
- spin_lock(&inode->i_lock);
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = nfsi->acl_access;
- break;
-
- case ACL_TYPE_DEFAULT:
- acl = nfsi->acl_default;
- break;
-
- default:
- goto out;
- }
- if (IS_ERR(acl))
- acl = ERR_PTR(-EAGAIN);
- else
- acl = posix_acl_dup(acl);
-out:
- spin_unlock(&inode->i_lock);
- dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
- inode->i_ino, type, acl);
- return acl;
-}
-
-static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
- struct posix_acl *dfacl)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
-
- dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id,
- inode->i_ino, acl, dfacl);
- spin_lock(&inode->i_lock);
- __nfs3_forget_cached_acls(NFS_I(inode));
- if (!IS_ERR(acl))
- nfsi->acl_access = posix_acl_dup(acl);
- if (!IS_ERR(dfacl))
- nfsi->acl_default = posix_acl_dup(dfacl);
- spin_unlock(&inode->i_lock);
-}
-
-struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct page *pages[NFSACL_MAXPAGES] = { };
@@ -198,7 +26,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
.rpc_argp = &args,
.rpc_resp = &res,
};
- struct posix_acl *acl;
int status, count;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -207,10 +34,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
status = nfs_revalidate_inode(server, inode);
if (status < 0)
return ERR_PTR(status);
- acl = nfs3_get_cached_acl(inode, type);
- if (acl != ERR_PTR(-EAGAIN))
- return acl;
- acl = NULL;
/*
* Only get the access acl when explicitly requested: We don't
@@ -257,40 +80,41 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
}
if (res.acl_access != NULL) {
- if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) {
+ if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) ||
+ res.acl_access->a_count == 0) {
posix_acl_release(res.acl_access);
res.acl_access = NULL;
}
}
- nfs3_cache_acls(inode,
- (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL),
- (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL));
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = res.acl_access;
- res.acl_access = NULL;
- break;
+ if (res.mask & NFS_ACL)
+ set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access);
+ else
+ forget_cached_acl(inode, ACL_TYPE_ACCESS);
- case ACL_TYPE_DEFAULT:
- acl = res.acl_default;
- res.acl_default = NULL;
+ if (res.mask & NFS_DFACL)
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default);
+ else
+ forget_cached_acl(inode, ACL_TYPE_DEFAULT);
+
+ nfs_free_fattr(res.fattr);
+ if (type == ACL_TYPE_ACCESS) {
+ posix_acl_release(res.acl_default);
+ return res.acl_access;
+ } else {
+ posix_acl_release(res.acl_access);
+ return res.acl_default;
}
getout:
posix_acl_release(res.acl_access);
posix_acl_release(res.acl_default);
nfs_free_fattr(res.fattr);
-
- if (status != 0) {
- posix_acl_release(acl);
- acl = ERR_PTR(status);
- }
- return acl;
+ return ERR_PTR(status);
}
-static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
- struct posix_acl *dfacl)
+static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+ struct posix_acl *dfacl)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_fattr *fattr;
@@ -353,7 +177,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
switch (status) {
case 0:
status = nfs_refresh_inode(inode, fattr);
- nfs3_cache_acls(inode, acl, dfacl);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
@@ -373,40 +198,43 @@ out:
return status;
}
-int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
+int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+ struct posix_acl *dfacl)
+{
+ int ret;
+ ret = __nfs3_proc_setacls(inode, acl, dfacl);
+ return (ret == -EOPNOTSUPP) ? 0 : ret;
+
+}
+
+int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct posix_acl *alloc = NULL, *dfacl = NULL;
int status;
if (S_ISDIR(inode->i_mode)) {
switch(type) {
- case ACL_TYPE_ACCESS:
- alloc = dfacl = nfs3_proc_getacl(inode,
- ACL_TYPE_DEFAULT);
- if (IS_ERR(alloc))
- goto fail;
- break;
-
- case ACL_TYPE_DEFAULT:
- dfacl = acl;
- alloc = acl = nfs3_proc_getacl(inode,
- ACL_TYPE_ACCESS);
- if (IS_ERR(alloc))
- goto fail;
- break;
-
- default:
- return -EINVAL;
+ case ACL_TYPE_ACCESS:
+ alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ if (IS_ERR(alloc))
+ goto fail;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ dfacl = acl;
+ alloc = acl = get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(alloc))
+ goto fail;
+ break;
}
- } else if (type != ACL_TYPE_ACCESS)
- return -EINVAL;
+ }
if (acl == NULL) {
alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(alloc))
goto fail;
}
- status = nfs3_proc_setacls(inode, acl, dfacl);
+ status = __nfs3_proc_setacls(inode, acl, dfacl);
posix_acl_release(alloc);
return status;
@@ -414,27 +242,51 @@ fail:
return PTR_ERR(alloc);
}
-int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
- umode_t mode)
+const struct xattr_handler *nfs3_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ NULL,
+};
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+ size_t size, ssize_t *result)
{
- struct posix_acl *dfacl, *acl;
- int error = 0;
+ struct posix_acl *acl;
+ char *p = data + *result;
- dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(dfacl)) {
- error = PTR_ERR(dfacl);
- return (error == -EOPNOTSUPP) ? 0 : error;
- }
- if (!dfacl)
+ acl = get_acl(inode, type);
+ if (!acl)
return 0;
- acl = posix_acl_dup(dfacl);
- error = posix_acl_create(&acl, GFP_KERNEL, &mode);
- if (error < 0)
- goto out_release_dfacl;
- error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ?
- dfacl : NULL);
+
posix_acl_release(acl);
-out_release_dfacl:
- posix_acl_release(dfacl);
- return error;
+
+ *result += strlen(name);
+ *result += 1;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ ssize_t result = 0;
+ int error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+ POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ if (error)
+ return error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+ POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ if (error)
+ return error;
+ return result;
}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 01b6f6a49d1..f0afa291fd5 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -18,6 +18,7 @@
#include <linux/lockd/bind.h>
#include <linux/nfs_mount.h>
#include <linux/freezer.h>
+#include <linux/xattr.h>
#include "iostat.h"
#include "internal.h"
@@ -317,8 +318,8 @@ static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
dprintk("NFS call create %pd\n", dentry);
@@ -340,7 +341,9 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
data->arg.create.verifier[1] = cpu_to_be32(current->pid);
}
- sattr->ia_mode &= ~current_umask();
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
for (;;) {
status = nfs3_do_create(dir, dentry, data);
@@ -366,7 +369,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
if (status != 0)
- goto out;
+ goto out_release_acls;
/* When we created the file with exclusive semantics, make
* sure we set the attributes afterwards. */
@@ -385,9 +388,14 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
dprintk("NFS reply setattr (post-create): %d\n", status);
if (status != 0)
- goto out;
+ goto out_release_acls;
}
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply create: %d\n", status);
@@ -471,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
}
static int
-nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .old_name = old_name,
- .new_dir = NFS_FH(new_dir),
- .new_name = new_name,
- };
- struct nfs_renameres res;
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
- .rpc_argp = &arg,
- .rpc_resp = &res,
- };
- int status = -ENOMEM;
-
- dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
-
- res.old_fattr = nfs_alloc_fattr();
- res.new_fattr = nfs_alloc_fattr();
- if (res.old_fattr == NULL || res.new_fattr == NULL)
- goto out;
-
- status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
- nfs_post_op_update_inode(old_dir, res.old_fattr);
- nfs_post_op_update_inode(new_dir, res.new_fattr);
-out:
- nfs_free_fattr(res.old_fattr);
- nfs_free_fattr(res.new_fattr);
- dprintk("NFS reply rename: %d\n", status);
- return status;
-}
-
-static int
nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs3_linkargs arg = {
@@ -572,18 +545,20 @@ out:
static int
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
dprintk("NFS call mkdir %pd\n", dentry);
- sattr->ia_mode &= ~current_umask();
-
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
+
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
data->arg.mkdir.fh = NFS_FH(dir);
data->arg.mkdir.name = dentry->d_name.name;
@@ -592,9 +567,13 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
- goto out;
+ goto out_release_acls;
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mkdir: %d\n", status);
@@ -691,19 +670,21 @@ static int
nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
dprintk("NFS call mknod %pd %u:%u\n", dentry,
MAJOR(rdev), MINOR(rdev));
- sattr->ia_mode &= ~current_umask();
-
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
+
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD];
data->arg.mknod.fh = NFS_FH(dir);
data->arg.mknod.name = dentry->d_name.name;
@@ -731,8 +712,13 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
- goto out;
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+ goto out_release_acls;
+
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mknod: %d\n", status);
@@ -809,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -821,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
return 0;
}
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -843,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
- return 0;
-}
-
static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
rpc_call_start(task);
@@ -904,20 +884,28 @@ static const struct inode_operations nfs3_dir_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .removexattr = generic_removexattr,
+ .get_acl = nfs3_get_acl,
+ .set_acl = nfs3_set_acl,
+#endif
};
static const struct inode_operations nfs3_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .removexattr = generic_removexattr,
+ .get_acl = nfs3_get_acl,
+ .set_acl = nfs3_set_acl,
+#endif
};
const struct nfs_rpc_ops nfs_v3_clientops = {
@@ -939,7 +927,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.unlink_setup = nfs3_proc_unlink_setup,
.unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
.unlink_done = nfs3_proc_unlink_done,
- .rename = nfs3_proc_rename,
.rename_setup = nfs3_proc_rename_setup,
.rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
.rename_done = nfs3_proc_rename_done,
@@ -953,19 +940,16 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.fsinfo = nfs3_proc_fsinfo,
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
+ .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
.read_setup = nfs3_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
- .clear_acl_cache = nfs3_forget_cached_acls,
+ .clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
.return_delegation = nfs3_return_delegation,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index cc471c72523..d6a98949af1 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -12,6 +12,9 @@ static struct nfs_subversion nfs_v3 = {
.rpc_vers = &nfs_version3,
.rpc_ops = &nfs_v3_clientops,
.sops = &nfs_sops,
+#ifdef CONFIG_NFS_V3_ACL
+ .xattr = nfs3_xattr_handlers,
+#endif
};
static int __init init_nfs_v3(void)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c1..8f4cbe7f4aa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
* };
*/
static void encode_read3args(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
* };
*/
static void encode_write3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_write3args(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -1589,7 +1589,7 @@ out_default:
* };
*/
static int decode_read3resok(struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
u32 eof, count, ocount, recvd;
__be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
}
static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -1673,7 +1673,7 @@ out_status:
* };
*/
static int decode_write3resok(struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
__be32 *p;
@@ -1697,7 +1697,7 @@ out_eio:
}
static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3ce79b04522..ba2affa5194 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,6 +9,14 @@
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
+#if defined(CONFIG_NFS_V4_2)
+#define NFS4_MAX_MINOR_VERSION 2
+#elif defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MINOR_VERSION 1
+#else
+#define NFS4_MAX_MINOR_VERSION 0
+#endif
+
#if IS_ENABLED(CONFIG_NFS_V4)
#define NFS4_MAX_LOOP_ON_RECOVER (10)
@@ -222,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
int nfs4_replace_transport(struct nfs_server *server,
@@ -262,6 +270,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
struct rpc_task *task);
+extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
@@ -328,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
*/
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_write_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
{
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -360,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_write_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
{
}
#endif /* CONFIG_NFS_V4_1 */
@@ -418,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs_inode_find_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
+extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
@@ -491,6 +501,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
return memcmp(dst, src, sizeof(*dst)) == 0;
}
+static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
+}
+
+static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+ return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
+}
+
static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
{
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index b4a160a405c..aa9ef487604 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -10,6 +10,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/bc_xprt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#include "internal.h"
#include "callback.h"
#include "delegation.h"
@@ -169,7 +170,7 @@ void nfs41_shutdown_client(struct nfs_client *clp)
void nfs40_shutdown_client(struct nfs_client *clp)
{
if (clp->cl_slot_tbl) {
- nfs4_release_slot_table(clp->cl_slot_tbl);
+ nfs4_shutdown_slot_table(clp->cl_slot_tbl);
kfree(clp->cl_slot_tbl);
}
}
@@ -370,6 +371,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
if (error == -EINVAL)
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
@@ -409,13 +411,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
- nfs_put_client(clp);
- if (clp != old) {
- clp->cl_preserve_clid = true;
- clp = old;
- }
- return clp;
+ if (clp != old)
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ return old;
error:
nfs_mark_client_ready(clp, error);
@@ -493,9 +493,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ goto out;
+ status = -NFS4ERR_STALE_CLIENTID;
+ spin_lock(&nn->nfs_client_lock);
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
@@ -530,6 +531,13 @@ int nfs40_walk_client_list(struct nfs_client *new,
*result = pos;
dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
__func__, pos, atomic_read(&pos->cl_count));
+ goto out;
+ case -ERESTARTSYS:
+ case -ETIMEDOUT:
+ /* The callback path may have been inadvertently
+ * changed. Schedule recovery!
+ */
+ nfs4_schedule_path_down_recovery(pos);
default:
goto out;
}
@@ -633,7 +641,8 @@ int nfs41_walk_client_list(struct nfs_client *new,
}
spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ break;
+ status = -NFS4ERR_STALE_CLIENTID;
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
@@ -1133,6 +1142,7 @@ static int nfs_probe_destination(struct nfs_server *server)
* @hostname: new end-point's hostname
* @sap: new end-point's socket address
* @salen: size of "sap"
+ * @net: net namespace
*
* The nfs_server must be quiescent before this function is invoked.
* Either its session is drained (NFSv4.1+), or its transport is
@@ -1141,13 +1151,13 @@ static int nfs_probe_destination(struct nfs_server *server)
* Returns zero on success, or a negative errno value.
*/
int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen)
+ struct sockaddr *sap, size_t salen, struct net *net)
{
struct nfs_client *clp = server->nfs_client;
struct rpc_clnt *clnt = server->client;
struct xprt_create xargs = {
.ident = clp->cl_proto,
- .net = &init_net,
+ .net = net,
.dstaddr = sap,
.addrlen = salen,
.servername = hostname,
@@ -1187,7 +1197,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
error = nfs4_set_client(server, hostname, sap, salen, buf,
clp->cl_rpcclient->cl_auth->au_flavor,
clp->cl_proto, clnt->cl_timeout,
- clp->cl_minorversion, clp->cl_net);
+ clp->cl_minorversion, net);
nfs_put_client(clp);
if (error != 0) {
nfs_server_insert_lists(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407e036..a816f0627a6 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
break;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- if (!ret && !datasync)
- /* application has asked for meta-data sync */
+ if (!ret)
ret = pnfs_layoutcommit_inode(inode, true);
mutex_unlock(&inode->i_mutex);
/*
@@ -118,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct file_operations nfs4_file_operations = {
.llseek = nfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = nfs_file_read,
+ .write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs4_file_open,
.flush = nfs_file_flush,
@@ -130,7 +129,7 @@ const struct file_operations nfs4_file_operations = {
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
- .splice_write = nfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 4e7f05d3e9d..3d83cb1fdc7 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -121,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry,
}
static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct nfs_server *server)
+ struct sockaddr *sa, size_t salen, struct net *net)
{
- struct net *net = rpc_net_ns(server->client);
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
@@ -140,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len,
* @server: NFS server struct
* @flavors: List of security tuples returned by SECINFO procedure
*
- * Return the pseudoflavor of the first security mechanism in
- * "flavors" that is locally supported. Return RPC_AUTH_UNIX if
- * no matching flavor is found in the array. The "flavors" array
+ * Return an rpc client that uses the first security mechanism in
+ * "flavors" that is locally supported. The "flavors" array
* is searched in the order returned from the server, per RFC 3530
- * recommendation.
+ * recommendation and each flavor is checked for membership in the
+ * sec= mount option list if it exists.
+ *
+ * Return -EPERM if no matching flavor is found in the array.
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ *
*/
-static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
+static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
+ struct nfs_server *server,
struct nfs4_secinfo_flavors *flavors)
{
- rpc_authflavor_t pseudoflavor;
+ rpc_authflavor_t pflavor;
struct nfs4_secinfo4 *secinfo;
unsigned int i;
@@ -160,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
case RPC_AUTH_NULL:
case RPC_AUTH_UNIX:
case RPC_AUTH_GSS:
- pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+ pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
&secinfo->flavor_info);
- /* make sure pseudoflavor matches sec= mount opt */
- if (pseudoflavor != RPC_AUTH_MAXFLAVOR &&
- nfs_auth_info_match(&server->auth_info,
- pseudoflavor))
- return pseudoflavor;
- break;
+ /* does the pseudoflavor match a sec= mount opt? */
+ if (pflavor != RPC_AUTH_MAXFLAVOR &&
+ nfs_auth_info_match(&server->auth_info, pflavor)) {
+ struct rpc_clnt *new;
+ struct rpc_cred *cred;
+
+ /* Cloning creates an rpc_auth for the flavor */
+ new = rpc_clone_client_set_auth(clnt, pflavor);
+ if (IS_ERR(new))
+ continue;
+ /**
+ * Check that the user actually can use the
+ * flavor. This is mostly for RPC_AUTH_GSS
+ * where cr_init obtains a gss context
+ */
+ cred = rpcauth_lookupcred(new->cl_auth, 0);
+ if (IS_ERR(cred)) {
+ rpc_shutdown_client(new);
+ continue;
+ }
+ put_rpccred(cred);
+ return new;
+ }
}
}
-
- /* if there were any sec= options then nothing matched */
- if (server->auth_info.flavor_len > 0)
- return -EPERM;
-
- return RPC_AUTH_UNIX;
+ return ERR_PTR(-EPERM);
}
-static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+/**
+ * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
+ * return an rpc_clnt that uses the best available security flavor with
+ * respect to the secinfo flavor list and the sec= mount options.
+ *
+ * @clnt: RPC client to clone
+ * @inode: directory inode
+ * @name: lookup name
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ */
+struct rpc_clnt *
+nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
+ struct qstr *name)
{
struct page *page;
struct nfs4_secinfo_flavors *flavors;
- rpc_authflavor_t flavor;
+ struct rpc_clnt *new;
int err;
page = alloc_page(GFP_KERNEL);
if (!page)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+
flavors = page_address(page);
err = nfs4_proc_secinfo(inode, name, flavors);
if (err < 0) {
- flavor = err;
+ new = ERR_PTR(err);
goto out;
}
- flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors);
+ new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
out:
put_page(page);
- return flavor;
-}
-
-/*
- * Please call rpc_shutdown_client() when you are done with this client.
- */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
- struct qstr *name)
-{
- rpc_authflavor_t flavor;
-
- flavor = nfs4_negotiate_security(inode, name);
- if ((int)flavor < 0)
- return ERR_PTR((int)flavor);
-
- return rpc_clone_client_set_auth(clnt, flavor);
+ return new;
}
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -223,6 +239,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
const struct nfs4_fs_location *location)
{
const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
unsigned int maxbuflen;
@@ -248,8 +265,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
continue;
mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
- mountdata->addr, addr_bufsize,
- NFS_SB(mountdata->sb));
+ mountdata->addr, addr_bufsize, net);
if (mountdata->addrlen == 0)
continue;
@@ -398,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
if (client->cl_auth->au_flavor != flavor)
flavor = client->cl_auth->au_flavor;
- else {
- rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
- if ((int)new >= 0)
- flavor = new;
- }
mnt = nfs_do_submount(dentry, fh, fattr, flavor);
out:
rpc_shutdown_client(client);
@@ -419,6 +430,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
const struct nfs4_fs_location *location)
{
const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ struct net *net = rpc_net_ns(server->client);
struct sockaddr *sap;
unsigned int s;
size_t salen;
@@ -440,7 +452,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, server);
+ sap, addr_bufsize, net);
if (salen == 0)
continue;
rpc_set_port(sap, NFS_PORT);
@@ -450,7 +462,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
if (hostname == NULL)
break;
- error = nfs4_update_server(server, hostname, sap, salen);
+ error = nfs4_update_server(server, hostname, sap, salen, net);
kfree(hostname);
if (error == 0)
break;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 659990c0109..4bf3d97cc5a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -539,7 +539,7 @@ static int nfs40_sequence_done(struct rpc_task *task,
struct nfs4_slot *slot = res->sr_slot;
struct nfs4_slot_table *tbl;
- if (!RPC_WAS_SENT(task))
+ if (slot == NULL)
goto out;
tbl = slot->table;
@@ -559,15 +559,10 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
+ struct nfs4_slot *slot = res->sr_slot;
bool send_new_highest_used_slotid = false;
- if (!res->sr_slot) {
- /* just wake up the next guy waiting since
- * we may have not consumed a slot after all */
- dprintk("%s: No slot\n", __func__);
- return;
- }
- tbl = res->sr_slot->table;
+ tbl = slot->table;
session = tbl->session;
spin_lock(&tbl->slot_tbl_lock);
@@ -577,11 +572,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
if (tbl->highest_used_slotid > tbl->target_highest_slotid)
send_new_highest_used_slotid = true;
- if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) {
+ if (nfs41_wake_and_assign_slot(tbl, slot)) {
send_new_highest_used_slotid = false;
goto out_unlock;
}
- nfs4_free_slot(tbl, res->sr_slot);
+ nfs4_free_slot(tbl, slot);
if (tbl->highest_used_slotid != NFS4_NO_SLOT)
send_new_highest_used_slotid = false;
@@ -592,19 +587,20 @@ out_unlock:
nfs41_server_notify_highest_slotid_update(session->clp);
}
-static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
- struct nfs4_slot *slot;
+ struct nfs4_slot *slot = res->sr_slot;
struct nfs_client *clp;
bool interrupted = false;
int ret = 1;
+ if (slot == NULL)
+ goto out_noaction;
/* don't increment the sequence number if the task wasn't sent */
if (!RPC_WAS_SENT(task))
goto out;
- slot = res->sr_slot;
session = slot->table->session;
if (slot->interrupted) {
@@ -679,6 +675,7 @@ out:
/* The session may be reset by one of the error handlers. */
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
nfs41_sequence_free_slot(res);
+out_noaction:
return ret;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
@@ -692,6 +689,7 @@ out_retry:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return 0;
}
+EXPORT_SYMBOL_GPL(nfs41_sequence_done);
static int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
@@ -1070,6 +1068,7 @@ static void nfs4_opendata_free(struct kref *kref)
dput(p->dentry);
nfs_sb_deactive(sb);
nfs_fattr_free_names(&p->f_attr);
+ kfree(p->f_attr.mdsthreshold);
kfree(p);
}
@@ -1139,12 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
nfs4_state_set_mode_locked(state, state->state | fmode);
}
-static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
+ struct nfs_client *clp = state->owner->so_server->nfs_client;
+ bool need_recover = false;
+
+ if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
+ need_recover = true;
+ if (need_recover)
+ nfs4_state_mark_reclaim_nograce(clp, state);
+}
+
+static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+ nfs4_stateid *stateid)
+{
+ if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ return true;
+ if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ nfs_test_and_clear_all_open_stateid(state);
+ return true;
+ }
+ if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+ return true;
+ return false;
+}
+
+static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
+ nfs4_stateid *stateid, fmode_t fmode)
+{
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ switch (fmode & (FMODE_READ|FMODE_WRITE)) {
+ case FMODE_WRITE:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ:
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case 0:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
+ }
+ if (stateid == NULL)
+ return;
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
- set_bit(NFS_OPEN_STATE, &state->flags);
+}
+
+static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
+ write_seqlock(&state->seqlock);
+ nfs_clear_open_stateid_locked(state, stateid, fmode);
+ write_sequnlock(&state->seqlock);
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
+}
+
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -1155,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
case FMODE_READ|FMODE_WRITE:
set_bit(NFS_O_RDWR_STATE, &state->flags);
}
-}
-
-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
-{
- write_seqlock(&state->seqlock);
- nfs_set_open_stateid_locked(state, stateid, fmode);
- write_sequnlock(&state->seqlock);
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
}
static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
@@ -1219,6 +1275,8 @@ no_delegation:
__update_open_stateid(state, open_stateid, NULL, fmode);
ret = 1;
}
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
return ret;
}
@@ -1452,12 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
struct nfs4_state *newstate;
int ret;
+ /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
/* memory barrier prior to reading state->n_* */
clear_bit(NFS_DELEGATED_STATE, &state->flags);
clear_bit(NFS_OPEN_STATE, &state->flags);
smp_rmb();
if (state->n_rdwr != 0) {
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1465,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_wronly != 0) {
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1473,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_rdonly != 0) {
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
if (ret != 0)
return ret;
@@ -1622,15 +1681,15 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
- nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args,
- &data->o_res.seq_res, task);
+ nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args,
+ &data->c_res.seq_res, task);
}
static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
- nfs40_sequence_done(task, &data->o_res.seq_res);
+ nfs40_sequence_done(task, &data->c_res.seq_res);
data->rpc_status = task->tk_status;
if (data->rpc_status == 0) {
@@ -1688,7 +1747,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
};
int status;
- nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1);
+ nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
@@ -1968,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return status;
}
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
- _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+ nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
return 0;
}
@@ -2246,10 +2305,12 @@ static int _nfs4_do_open(struct inode *dir,
}
}
- if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
- opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
- if (!opendata->f_attr.mdsthreshold)
- goto err_free_label;
+ if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ if (!opendata->f_attr.mdsthreshold) {
+ opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+ if (!opendata->f_attr.mdsthreshold)
+ goto err_free_label;
+ }
opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
}
if (dentry->d_inode != NULL)
@@ -2277,11 +2338,10 @@ static int _nfs4_do_open(struct inode *dir,
if (opendata->file_created)
*opened |= FILE_CREATED;
- if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+ if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
*ctx_th = opendata->f_attr.mdsthreshold;
- else
- kfree(opendata->f_attr.mdsthreshold);
- opendata->f_attr.mdsthreshold = NULL;
+ opendata->f_attr.mdsthreshold = NULL;
+ }
nfs4_label_free(olabel);
@@ -2291,7 +2351,6 @@ static int _nfs4_do_open(struct inode *dir,
err_free_label:
nfs4_label_free(olabel);
err_opendata_put:
- kfree(opendata->f_attr.mdsthreshold);
nfs4_opendata_put(opendata);
err_put_state_owner:
nfs4_put_state_owner(sp);
@@ -2400,13 +2459,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) {
/* Use that stateid */
- } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) {
+ } else if (truncate && state != NULL) {
struct nfs_lockowner lockowner = {
.l_owner = current->files,
.l_pid = current->tgid,
};
- nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
- &lockowner);
+ if (!nfs4_valid_open_stateid(state))
+ return -EBADF;
+ if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
+ &lockowner) == -EIO)
+ return -EBADF;
} else
nfs4_stateid_copy(&arg.stateid, &zero_stateid);
@@ -2478,26 +2540,6 @@ static void nfs4_free_closedata(void *data)
kfree(calldata);
}
-static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
- fmode_t fmode)
-{
- spin_lock(&state->owner->so_lock);
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- switch (fmode & (FMODE_READ|FMODE_WRITE)) {
- case FMODE_WRITE:
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- break;
- case FMODE_READ:
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- break;
- case 0:
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_OPEN_STATE, &state->flags);
- }
- spin_unlock(&state->owner->so_lock);
-}
-
static void nfs4_close_done(struct rpc_task *task, void *data)
{
struct nfs4_closedata *calldata = data;
@@ -2516,11 +2558,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->roc)
pnfs_roc_set_barrier(state->inode,
calldata->roc_barrier);
- nfs_set_open_stateid(state, &calldata->res.stateid, 0);
+ nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- nfs4_close_clear_stateid_flags(state,
- calldata->arg.fmode);
- break;
+ goto out_release;
+ case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -2528,9 +2569,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
rpc_restart_call_prepare(task);
+ goto out_release;
+ }
}
+ nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
+out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
@@ -2705,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
@@ -2741,7 +2786,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
NFS_CAP_CTIME|NFS_CAP_MTIME|
NFS_CAP_SECURITY_LABEL);
- if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
+ if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
+ res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS;
if (res.has_links != 0)
server->caps |= NFS_CAP_HARDLINKS;
@@ -3201,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
err = -EPERM;
if (client != *clnt)
goto out;
- client = nfs4_create_sec_client(client, dir, name);
+ client = nfs4_negotiate_security(client, dir, name);
if (IS_ERR(client))
return PTR_ERR(client);
@@ -3502,49 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 1;
}
-static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_server *server = NFS_SERVER(old_dir);
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .new_dir = NFS_FH(new_dir),
- .old_name = old_name,
- .new_name = new_name,
- };
- struct nfs_renameres res = {
- .server = server,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
- .rpc_argp = &arg,
- .rpc_resp = &res,
- };
- int status = -ENOMEM;
-
- status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
- if (!status) {
- update_changeattr(old_dir, &res.old_cinfo);
- update_changeattr(new_dir, &res.new_cinfo);
- }
- return status;
-}
-
-static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs4_exception exception = { };
- int err;
- do {
- err = _nfs4_proc_rename(old_dir, old_name,
- new_dir, new_name);
- trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err);
- err = nfs4_handle_exception(NFS_SERVER(old_dir), err,
- &exception);
- } while (exception.retry);
- return err;
-}
-
static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_server *server = NFS_SERVER(inode);
@@ -4009,8 +4012,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
{
nfs4_stateid current_stateid;
- if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
- return false;
+ /* If the current stateid represents a lost lock, then exit */
+ if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ return true;
return nfs4_stateid_match(stateid, &current_stateid);
}
@@ -4029,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
return false;
}
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
{
nfs_invalidate_atime(data->header->inode);
}
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
@@ -4051,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
}
static bool nfs4_read_stateid_changed(struct rpc_task *task,
- struct nfs_readargs *args)
+ struct nfs_pgio_args *args)
{
if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4064,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
dprintk("--> %s\n", __func__);
@@ -4073,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &data->args))
return -EAGAIN;
- return data->read_done_cb ? data->read_done_cb(task, data) :
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_read_done_cb(task, data);
}
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
data->timestamp = jiffies;
- data->read_done_cb = nfs4_read_done_cb;
+ data->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
}
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
@@ -4093,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
task))
return 0;
if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, FMODE_READ) == -EIO)
+ data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
return -EIO;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
return -EIO;
return 0;
}
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -4117,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
}
static bool nfs4_write_stateid_changed(struct rpc_task *task,
- struct nfs_writeargs *args)
+ struct nfs_pgio_args *args)
{
if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4130,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
if (nfs4_write_stateid_changed(task, &data->args))
return -EAGAIN;
- return data->write_done_cb ? data->write_done_cb(task, data) :
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_write_done_cb(task, data);
}
static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
{
const struct nfs_pgio_header *hdr = data->header;
@@ -4154,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
@@ -4164,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
} else
data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_write_done_cb;
+ if (!data->pgio_done_cb)
+ data->pgio_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
@@ -4173,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
- &data->args.seq_args,
- &data->res.seq_res,
- task))
- return 0;
- if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, FMODE_WRITE) == -EIO)
- return -EIO;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
- return -EIO;
- return 0;
-}
-
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -4318,9 +4307,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
static inline int nfs4_server_supports_acls(struct nfs_server *server)
{
- return (server->caps & NFS_CAP_ACLS)
- && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
- && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+ return server->caps & NFS_CAP_ACLS;
}
/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
@@ -4802,7 +4789,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- goto restart_call;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4880,6 +4867,20 @@ nfs4_init_uniform_client_string(const struct nfs_client *clp,
nodename);
}
+/*
+ * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback
+ * services. Advertise one based on the address family of the
+ * clientaddr.
+ */
+static unsigned int
+nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
+{
+ if (strchr(clp->cl_ipaddr, ':') != NULL)
+ return scnprintf(buf, len, "tcp6");
+ else
+ return scnprintf(buf, len, "tcp");
+}
+
/**
* nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure
@@ -4921,12 +4922,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
setclientid.sc_name,
sizeof(setclientid.sc_name));
/* cb_client4 */
- rcu_read_lock();
- setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
- sizeof(setclientid.sc_netid), "%s",
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_NETID));
- rcu_read_unlock();
+ setclientid.sc_netid_len =
+ nfs4_init_callback_netid(clp,
+ setclientid.sc_netid,
+ sizeof(setclientid.sc_netid));
setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
@@ -4987,11 +4986,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -5822,8 +5827,7 @@ struct nfs_release_lockowner_data {
struct nfs4_lock_state *lsp;
struct nfs_server *server;
struct nfs_release_lockowner_args args;
- struct nfs4_sequence_args seq_args;
- struct nfs4_sequence_res seq_res;
+ struct nfs_release_lockowner_res res;
unsigned long timestamp;
};
@@ -5831,7 +5835,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata
{
struct nfs_release_lockowner_data *data = calldata;
nfs40_setup_sequence(data->server,
- &data->seq_args, &data->seq_res, task);
+ &data->args.seq_args, &data->res.seq_res, task);
data->timestamp = jiffies;
}
@@ -5840,7 +5844,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
struct nfs_release_lockowner_data *data = calldata;
struct nfs_server *server = data->server;
- nfs40_sequence_done(task, &data->seq_res);
+ nfs40_sequence_done(task, &data->res.seq_res);
switch (task->tk_status) {
case 0:
@@ -5881,7 +5885,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
return -ENOMEM;
- nfs4_init_sequence(&data->seq_args, &data->seq_res, 0);
data->lsp = lsp;
data->server = server;
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5889,6 +5892,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
data->args.lock_owner.s_dev = server->s_dev;
msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
return 0;
}
@@ -7400,9 +7405,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
- unsigned long timeo, giveup;
+ unsigned long timeo, now, giveup;
- dprintk("--> %s\n", __func__);
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
@@ -7410,12 +7415,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+ /*
+ * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+ * (or clients) writing to the same RAID stripe
+ */
case -NFS4ERR_LAYOUTTRYLATER:
+ /*
+ * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+ * existing layout before getting a new one).
+ */
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
- if (time_after(giveup, jiffies))
- task->tk_status = -NFS4ERR_DELAY;
+ now = jiffies;
+ if (time_after(giveup, now)) {
+ unsigned long delay;
+
+ /* Delay for:
+ * - Not less then NFS4_POLL_RETRY_MIN.
+ * - One last time a jiffie before we give up
+ * - exponential backoff (time_now minus start_attempt)
+ */
+ delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+ min((giveup - now - 1),
+ now - lgp->args.timestamp));
+
+ dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+ __func__, delay);
+ rpc_delay(task, delay);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out; /* Do not call nfs4_async_handle_error() */
+ }
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
@@ -7589,7 +7620,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
server = NFS_SERVER(lrp->args.inode);
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ switch (task->tk_status) {
+ default:
+ task->tk_status = 0;
+ case 0:
+ break;
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+ break;
rpc_restart_call_prepare(task);
return;
}
@@ -7764,10 +7802,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_BADLAYOUT: /* no layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
- break;
case 0:
- nfs_post_op_update_inode_force_wcc(data->args.inode,
- data->res.fattr);
break;
default:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
@@ -7782,6 +7817,8 @@ static void nfs4_layoutcommit_release(void *calldata)
struct nfs4_layoutcommit_data *data = calldata;
pnfs_cleanup_layoutcommit(data);
+ nfs_post_op_update_inode_force_wcc(data->args.inode,
+ data->res.fattr);
put_rpccred(data->cred);
kfree(data);
}
@@ -7904,7 +7941,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
@@ -7938,7 +7975,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
- if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
@@ -8366,7 +8403,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.unlink_setup = nfs4_proc_unlink_setup,
.unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
.unlink_done = nfs4_proc_unlink_done,
- .rename = nfs4_proc_rename,
.rename_setup = nfs4_proc_rename_setup,
.rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
.rename_done = nfs4_proc_rename_done,
@@ -8381,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
+ .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
.read_setup = nfs4_proc_read_setup,
- .read_pageio_init = pnfs_pageio_init_read,
- .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
.read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
- .write_pageio_init = pnfs_pageio_init_write,
- .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index cf883c7ae05..e799dc3c3b1 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -231,14 +231,23 @@ out:
return ret;
}
+/*
+ * nfs4_release_slot_table - release all slot table entries
+ */
+static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+{
+ nfs4_shrink_slot_table(tbl, 0);
+}
+
/**
- * nfs4_release_slot_table - release resources attached to a slot table
+ * nfs4_shutdown_slot_table - release resources attached to a slot table
* @tbl: slot table to shut down
*
*/
-void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
{
- nfs4_shrink_slot_table(tbl, 0);
+ nfs4_release_slot_table(tbl);
+ rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
}
/**
@@ -422,7 +431,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
spin_unlock(&tbl->slot_tbl_lock);
}
-static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+static void nfs4_release_session_slot_tables(struct nfs4_session *session)
{
nfs4_release_slot_table(&session->fc_slot_table);
nfs4_release_slot_table(&session->bc_slot_table);
@@ -450,7 +459,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
if (status && tbl->slots == NULL)
/* Fore and back channel share a connection so get
* both slot tables or neither */
- nfs4_destroy_session_slot_tables(ses);
+ nfs4_release_session_slot_tables(ses);
return status;
}
@@ -470,6 +479,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
return session;
}
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+ nfs4_shutdown_slot_table(&session->fc_slot_table);
+ nfs4_shutdown_slot_table(&session->bc_slot_table);
+}
+
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 23230610065..b34ada9bc6a 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -74,7 +74,7 @@ enum nfs4_session_state {
extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
unsigned int max_reqs, const char *queue);
-extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl);
+extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 059c01b67a7..848f6853c59 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
nfs4_stateid_copy(dst, &lsp->ls_stateid);
ret = 0;
- smp_rmb();
- if (!list_empty(&lsp->ls_seqid.list))
- ret = -EWOULDBLOCK;
}
spin_unlock(&state->state_lock);
nfs4_put_lock_state(lsp);
@@ -984,10 +981,9 @@ out:
return ret;
}
-static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
+static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
const nfs4_stateid *src;
- int ret;
int seq;
do {
@@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
if (test_bit(NFS_OPEN_STATE, &state->flags))
src = &state->open_stateid;
nfs4_stateid_copy(dst, src);
- ret = 0;
- smp_rmb();
- if (!list_empty(&state->owner->so_seqid.list))
- ret = -EWOULDBLOCK;
} while (read_seqretry(&state->seqlock, seq));
- return ret;
}
/*
@@ -1015,15 +1006,19 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
if (ret == -EIO)
/* A lost lock - don't even consider delegations */
goto out;
- if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
+ /* returns true if delegation stateid found and copied */
+ if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) {
+ ret = 0;
goto out;
+ }
if (ret != -ENOENT)
/* nfs4_copy_delegation_stateid() didn't over-write
* dst, so it still has the lock stateid which we now
* choose to use.
*/
goto out;
- ret = nfs4_copy_open_stateid(dst, state);
+ nfs4_copy_open_stateid(dst, state);
+ ret = 0;
out:
if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
dst->seqid = 0;
@@ -1071,7 +1066,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
/*
* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
* failed with a seqid incrementing error -
- * see comments nfs_fs.h:seqid_mutating_error()
+ * see comments nfs4.h:seqid_mutating_error()
*/
static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
{
@@ -1116,7 +1111,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
/*
* Increment the seqid if the LOCK/LOCKU succeeded, or
* failed with a seqid incrementing error -
- * see comments nfs_fs.h:seqid_mutating_error()
+ * see comments nfs4.h:seqid_mutating_error()
*/
void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
{
@@ -1145,9 +1140,9 @@ static int nfs4_run_state_manager(void *);
static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
rpc_wake_up(&clp->cl_rpcwaitq);
}
@@ -1321,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st
return 1;
}
-static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1461,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
* server that doesn't support a grace period.
*/
spin_lock(&sp->so_lock);
- write_seqcount_begin(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1524,13 +1519,13 @@ restart:
spin_lock(&sp->so_lock);
goto restart;
}
- write_seqcount_end(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return 0;
out_err:
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
- write_seqcount_end(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return status;
}
@@ -2080,8 +2075,10 @@ again:
switch (status) {
case 0:
break;
- case -NFS4ERR_DELAY:
case -ETIMEDOUT:
+ if (clnt->cl_softrtry)
+ break;
+ case -NFS4ERR_DELAY:
case -EAGAIN:
ssleep(1);
case -NFS4ERR_STALE_CLIENTID:
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 65ab0a0ca1c..6f340f02f2b 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -77,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
- if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
- int status;
- bool sync = true;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- sync = false;
-
- status = pnfs_layoutcommit_inode(inode, sync);
- if (status < 0)
- return status;
- }
+ if (ret == 0)
+ ret = pnfs_layoutcommit_inode(inode,
+ wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
@@ -98,7 +90,7 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
*/
static void nfs4_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 2628d921b7e..b6ebe7e445f 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static ctl_table nfs4_cb_sysctls[] = {
+static struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
@@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_dir[] = {
+static struct ctl_table nfs4_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_root[] = {
+static struct ctl_table nfs4_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db3..0a744f3a86f 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO(
- const struct nfs_read_data *data,
+ const struct nfs_pgio_data *data,
int error
),
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
#define DEFINE_NFS4_READ_EVENT(name) \
DEFINE_EVENT(nfs4_read_event, name, \
TP_PROTO( \
- const struct nfs_read_data *data, \
+ const struct nfs_pgio_data *data, \
int error \
), \
TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
DECLARE_EVENT_CLASS(nfs4_write_event,
TP_PROTO(
- const struct nfs_write_data *data,
+ const struct nfs_pgio_data *data,
int error
),
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
#define DEFINE_NFS4_WRITE_EVENT(name) \
DEFINE_EVENT(nfs4_write_event, name, \
TP_PROTO( \
- const struct nfs_write_data *data, \
+ const struct nfs_pgio_data *data, \
int error \
), \
TP_ARGS(data, error))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5be2868c02f..939ae606cfa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -203,8 +203,7 @@ static int nfs4_stat_to_errno(int);
2 + encode_verifier_maxsz + 5 + \
nfs4_label_maxsz)
#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
- decode_verifier_maxsz + \
- nfs4_label_maxsz + nfs4_fattr_maxsz)
+ decode_verifier_maxsz)
#define encode_readlink_maxsz (op_encode_hdr_maxsz)
#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
#define encode_write_maxsz (op_encode_hdr_maxsz + \
@@ -1557,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
}
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
@@ -1702,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
encode_nfs4_verifier(xdr, &arg->confirm);
}
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
@@ -2452,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a READ request
*/
static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2514,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a WRITE request
*/
static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -3097,7 +3098,8 @@ out_overflow:
return -EIO;
}
-static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
@@ -3107,19 +3109,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
- if (opnum != expected) {
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
- return -EIO;
- }
+ if (unlikely(opnum != expected))
+ goto out_bad_operation;
nfserr = be32_to_cpup(p);
- if (nfserr != NFS_OK)
- return nfs4_stat_to_errno(nfserr);
- return 0;
+ if (nfserr == NFS_OK)
+ *nfs_retval = 0;
+ else
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
+ return true;
+out_bad_operation:
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ *nfs_retval = -EREMOTEIO;
+ return false;
out_overflow:
print_overflow_msg(__func__, xdr);
- return -EIO;
+ *nfs_retval = -EIO;
+ return false;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ int retval;
+
+ __decode_op_hdr(xdr, expected, &retval);
+ return retval;
}
/* Dummy routine */
@@ -3435,7 +3450,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
{
__be32 *p;
- *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
+ *res = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
@@ -5001,11 +5016,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
- status = decode_op_hdr(xdr, OP_OPEN);
- if (status != -EIO)
- nfs_increment_open_seqid(status, res->seqid);
- if (!status)
- status = decode_stateid(xdr, &res->stateid);
+ if (!__decode_op_hdr(xdr, OP_OPEN, &status))
+ return status;
+ nfs_increment_open_seqid(status, res->seqid);
+ if (status)
+ return status;
+ status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
@@ -5071,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_PUTROOTFH);
}
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+ struct nfs_pgio_res *res)
{
__be32 *p;
uint32_t count, eof, recvd;
@@ -5325,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
}
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
int status;
@@ -6622,7 +6639,7 @@ out:
* Decode Read response
*/
static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_readres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
@@ -6647,7 +6664,7 @@ out:
* Decode WRITE response
*/
static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 89fe741e58b..59f838cdc00 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -36,6 +36,7 @@
__print_flags(v, "|", \
{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
{ 1 << NFS_INO_STALE, "STALE" }, \
+ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
{ 1 << NFS_INO_COMMIT, "COMMIT" }, \
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f..611320753db 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync);
}
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
- struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct nfs_pgio_data *wdata = objios->oir.rpcdata;
struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page,
};
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
{
struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
return 0;
}
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req)
{
- if (!pnfs_generic_pg_test(pgio, prev, req))
- return false;
+ unsigned int size;
+
+ size = pnfs_generic_pg_test(pgio, prev, req);
+
+ if (!size || pgio->pg_count + req->wb_bytes >
+ (unsigned long)pgio->pg_layout_private)
+ return 0;
- return pgio->pg_count + req->wb_bytes <=
- (unsigned long)pgio->pg_layout_private;
+ return min(size, req->wb_bytes);
}
static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67..765d3f54e98 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct objlayout *objlay;
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
- if (objlay) {
- spin_lock_init(&objlay->lock);
- INIT_LIST_HEAD(&objlay->err_list);
- }
+ if (!objlay)
+ return NULL;
+ spin_lock_init(&objlay->lock);
+ INIT_LIST_HEAD(&objlay->err_list);
dprintk("%s: Return %p\n", __func__, objlay);
return &objlay->pnfs_layout;
}
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_read_data *rdata = oir->rpcdata;
+ struct nfs_pgio_data *rdata = oir->rpcdata;
oir->status = rdata->task.tk_status = status;
if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads.
*/
enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_write_done(wdata);
}
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_write_data *wdata = oir->rpcdata;
+ struct nfs_pgio_data *wdata = oir->rpcdata;
oir->status = wdata->task.tk_status = status;
if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes.
*/
enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
int how)
{
struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec612..01e041029a6 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
*/
extern void objio_free_result(struct objlayout_io_res *oir);
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
/*
* callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_read_data *);
+ struct nfs_pgio_data *);
extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_write_data *,
+ struct nfs_pgio_data *,
int how);
extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2ffebf2081c..17fab89f635 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,9 +24,12 @@
#include "internal.h"
#include "pnfs.h"
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
static struct kmem_cache *nfs_page_cachep;
+static const struct rpc_call_ops nfs_pgio_common_ops;
-bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
{
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -95,7 +98,7 @@ nfs_iocounter_dec(struct nfs_io_counter *c)
{
if (atomic_dec_and_test(&c->io_count)) {
clear_bit(NFS_IO_INPROGRESS, &c->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
}
}
@@ -133,11 +136,168 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c);
}
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ smp_mb__before_atomic();
+ clear_bit(PG_HEADLOCK, &head->wb_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+ struct nfs_page *head = req->wb_head;
+ struct nfs_page *tmp;
+
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+ WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+ tmp = req->wb_this_page;
+ while (tmp != req) {
+ if (!test_bit(bit, &tmp->wb_flags))
+ return false;
+ tmp = tmp->wb_this_page;
+ }
+
+ /* true! reset all bits */
+ tmp = req;
+ do {
+ clear_bit(bit, &tmp->wb_flags);
+ tmp = tmp->wb_this_page;
+ } while (tmp != req);
+
+ return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ * return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+ bool ret;
+
+ nfs_page_group_lock(req);
+ ret = nfs_page_group_sync_on_bit_locked(req, bit);
+ nfs_page_group_unlock(req);
+
+ return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ * or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+ WARN_ON_ONCE(prev == req);
+
+ if (!prev) {
+ /* a head request */
+ req->wb_head = req;
+ req->wb_this_page = req;
+ } else {
+ /* a subrequest */
+ WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+ req->wb_head = prev->wb_head;
+ req->wb_this_page = prev->wb_this_page;
+ prev->wb_this_page = req;
+
+ /* All subrequests take a ref on the head request until
+ * nfs_page_group_destroy is called */
+ kref_get(&req->wb_head->wb_kref);
+
+ /* grab extra ref if head request has extra ref from
+ * the write/commit path to handle handoff between write
+ * and commit lists */
+ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+ set_bit(PG_INODE_REF, &req->wb_flags);
+ kref_get(&req->wb_kref);
+ }
+ }
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ struct nfs_page *tmp, *next;
+
+ /* subrequests must release the ref on the head request */
+ if (req->wb_head != req)
+ nfs_release_request(req->wb_head);
+
+ if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+ return;
+
+ tmp = req;
+ do {
+ next = tmp->wb_this_page;
+ /* unlink and free */
+ tmp->wb_this_page = tmp;
+ tmp->wb_head = tmp;
+ nfs_free_request(tmp);
+ tmp = next;
+ } while (tmp != req);
+}
+
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
- * @inode: inode to which the request is attached
* @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
@@ -146,9 +306,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page,
- unsigned int offset, unsigned int count)
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+ struct nfs_page *last, unsigned int offset,
+ unsigned int count)
{
struct nfs_page *req;
struct nfs_lock_context *l_ctx;
@@ -180,6 +340,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
+ nfs_page_group_init(req, last);
return req;
}
@@ -193,9 +354,9 @@ void nfs_unlock_request(struct nfs_page *req)
printk(KERN_ERR "NFS: Invalid unlock attempted\n");
BUG();
}
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(PG_BUSY, &req->wb_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&req->wb_flags, PG_BUSY);
}
@@ -237,16 +398,22 @@ static void nfs_clear_request(struct nfs_page *req)
}
}
-
/**
* nfs_release_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
*/
-static void nfs_free_request(struct kref *kref)
+void nfs_free_request(struct nfs_page *req)
{
- struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
+ /* extra debug: make sure no sync bits are still set */
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
/* Release struct file and open context */
nfs_clear_request(req);
@@ -255,13 +422,7 @@ static void nfs_free_request(struct kref *kref)
void nfs_release_request(struct nfs_page *req)
{
- kref_put(&req->wb_kref, nfs_free_request);
-}
-
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
+ kref_put(&req->wb_kref, nfs_page_group_destroy);
}
/**
@@ -279,22 +440,249 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *prev, struct nfs_page *req)
{
- /*
- * FIXME: ideally we should be able to coalesce all requests
- * that are not block boundary aligned, but currently this
- * is problematic for the case of bsize < PAGE_CACHE_SIZE,
- * since nfs_flush_multi and nfs_pagein_multi assume you
- * can have only one struct nfs_page.
- */
- if (desc->pg_bsize < PAGE_SIZE)
+ if (desc->pg_count > desc->pg_bsize) {
+ /* should never happen */
+ WARN_ON_ONCE(1);
return 0;
+ }
- return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+ return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
+static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+{
+ return container_of(hdr, struct nfs_rw_header, header);
+}
+
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+ struct nfs_rw_header *header = ops->rw_alloc_header();
+
+ if (header) {
+ struct nfs_pgio_header *hdr = &header->header;
+
+ INIT_LIST_HEAD(&hdr->pages);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
+ hdr->rw_ops = ops;
+ }
+ return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+ hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
+/**
+ * nfs_pgio_data_alloc - Allocate pageio data
+ * @hdr: The header making a request
+ * @pagecount: Number of pages to create
+ */
+static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_pgio_data *data, *prealloc;
+
+ prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
+ }
+out:
+ return data;
+}
+
+/**
+ * nfs_pgio_data_release - Properly free pageio data
+ * @data: The data to release
+ */
+void nfs_pgio_data_release(struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
+
+ put_nfs_open_context(data->args.context);
+ if (data->pages.pagevec != data->pages.page_array)
+ kfree(data->pages.pagevec);
+ if (data == &pageio_header->rpc_data) {
+ data->header = NULL;
+ data = NULL;
+ }
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ /* Note: we only free the rpc_task after callbacks are done.
+ * See the comment in rpc_free_task() for why
+ */
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+
+/**
+ * nfs_pgio_rpcsetup - Set up arguments for a pageio call
+ * @data: The pageio data
+ * @count: Number of bytes to read
+ * @offset: Initial offset
+ * @how: How to commit data (writes only)
+ * @cinfo: Commit information for the call (writes only)
+ */
+static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+ unsigned int count, unsigned int offset,
+ int how, struct nfs_commit_info *cinfo)
+{
+ struct nfs_page *req = data->header->req;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->args.fh = NFS_FH(data->header->inode);
+ data->args.offset = req_offset(req) + offset;
+ /* pnfs_set_layoutcommit needs this */
+ data->mds_offset = data->args.offset;
+ data->args.pgbase = req->wb_pgbase + offset;
+ data->args.pages = data->pages.pagevec;
+ data->args.count = count;
+ data->args.context = get_nfs_open_context(req->wb_context);
+ data->args.lock_context = req->wb_lock_context;
+ data->args.stable = NFS_UNSTABLE;
+ switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+ case 0:
+ break;
+ case FLUSH_COND_STABLE:
+ if (nfs_reqs_to_commit(cinfo))
+ break;
+ default:
+ data->args.stable = NFS_FILE_SYNC;
+ }
+
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.eof = 0;
+ data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
+}
+
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ int err;
+ err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+ if (err)
+ rpc_exit(task, err);
+}
+
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+ const struct rpc_call_ops *call_ops, int how, int flags)
+{
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->header->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .task = &data->task,
+ .rpc_message = &msg,
+ .callback_ops = call_ops,
+ .callback_data = data,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | flags,
+ };
+ int ret = 0;
+
+ data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+ dprintk("NFS: %5u initiated pgio call "
+ "(req %s/%llu, %u bytes @ offset %llu)\n",
+ data->task.tk_pid,
+ data->header->inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(data->header->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+ if (how & FLUSH_SYNC) {
+ ret = rpc_wait_for_completion_task(task);
+ if (ret == 0)
+ ret = task->tk_status;
+ }
+ rpc_put_task(task);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
+/**
+ * nfs_pgio_error - Clean up from a pageio error
+ * @desc: IO descriptor
+ * @hdr: pageio header
+ */
+static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ nfs_pgio_data_release(hdr->data);
+ hdr->data = NULL;
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+static void nfs_pgio_release(void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ if (data->header->rw_ops->rw_release)
+ data->header->rw_ops->rw_release(data);
+ nfs_pgio_data_release(data);
+}
+
/**
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
@@ -307,6 +695,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
const struct nfs_pgio_completion_ops *compl_ops,
+ const struct nfs_rw_ops *rw_ops,
size_t bsize,
int io_flags)
{
@@ -320,6 +709,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
desc->pg_completion_ops = compl_ops;
+ desc->pg_rw_ops = rw_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
@@ -328,6 +718,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init);
+/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+static void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ struct inode *inode = data->header->inode;
+
+ dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+ task->tk_pid, task->tk_status);
+
+ if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+ return;
+ if (task->tk_status < 0)
+ nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+ else
+ data->header->rw_ops->rw_result(task, data);
+}
+
+/*
+ * Create an RPC task for the given read or write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_page *req;
+ struct page **pages;
+ struct nfs_pgio_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct nfs_commit_info cinfo;
+
+ data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data)
+ return nfs_pgio_error(desc, hdr);
+
+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+ pages = data->pages.pagevec;
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);
+ *pages++ = req->wb_page;
+ }
+
+ if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
+ desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
+ /* Set up the argument struct */
+ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+ hdr->data = data;
+ desc->pg_rpc_callops = &nfs_pgio_common_ops;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pgio);
+
+static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
+{
+ struct nfs_rw_header *rw_hdr;
+ struct nfs_pgio_header *hdr;
+ int ret;
+
+ rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
+ if (!rw_hdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &rw_hdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pgio(desc, hdr);
+ if (ret == 0)
+ ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+ hdr->data, desc->pg_rpc_callops,
+ desc->pg_ioflags, 0);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ return ret;
+}
+
static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
@@ -356,18 +834,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
- if (!nfs_match_open_context(req->wb_context, prev->wb_context))
- return false;
- if (req->wb_context->dentry->d_inode->i_flock != NULL &&
- !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context))
- return false;
- if (req->wb_pgbase != 0)
- return false;
- if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
- return false;
- if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
- return false;
- return pgio->pg_ops->pg_test(pgio, prev, req);
+ size_t size;
+
+ if (prev) {
+ if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+ return false;
+ if (req->wb_context->dentry->d_inode->i_flock != NULL &&
+ !nfs_match_lock_context(req->wb_lock_context,
+ prev->wb_lock_context))
+ return false;
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
+ }
+ size = pgio->pg_ops->pg_test(pgio, prev, req);
+ WARN_ON_ONCE(size > req->wb_bytes);
+ if (size && size < req->wb_bytes)
+ req->wb_bytes = size;
+ return size > 0;
}
/**
@@ -381,17 +864,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
+ struct nfs_page *prev = NULL;
if (desc->pg_count != 0) {
- struct nfs_page *prev;
-
prev = nfs_list_entry(desc->pg_list.prev);
- if (!nfs_can_coalesce_requests(prev, req, desc))
- return 0;
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
+ if (!nfs_can_coalesce_requests(prev, req, desc))
+ return 0;
nfs_list_remove_request(req);
nfs_list_add_request(req, &desc->pg_list);
desc->pg_count += req->wb_bytes;
@@ -421,22 +903,72 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* @desc: destination io descriptor
* @req: request
*
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
- while (!nfs_pageio_do_add_request(desc, req)) {
- desc->pg_moreio = 1;
- nfs_pageio_doio(desc);
- if (desc->pg_error < 0)
- return 0;
- desc->pg_moreio = 0;
- if (desc->pg_recoalesce)
- return 0;
- }
+ struct nfs_page *subreq;
+ unsigned int bytes_left = 0;
+ unsigned int offset, pgbase;
+
+ nfs_page_group_lock(req);
+
+ subreq = req;
+ bytes_left = subreq->wb_bytes;
+ offset = subreq->wb_offset;
+ pgbase = subreq->wb_pgbase;
+
+ do {
+ if (!nfs_pageio_do_add_request(desc, subreq)) {
+ /* make sure pg_test call(s) did nothing */
+ WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+ WARN_ON_ONCE(subreq->wb_offset != offset);
+ WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+ nfs_page_group_unlock(req);
+ desc->pg_moreio = 1;
+ nfs_pageio_doio(desc);
+ if (desc->pg_error < 0)
+ return 0;
+ if (desc->pg_recoalesce)
+ return 0;
+ /* retry add_request for this subreq */
+ nfs_page_group_lock(req);
+ continue;
+ }
+
+ /* check for buggy pg_test call(s) */
+ WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+ WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+ WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+ bytes_left -= subreq->wb_bytes;
+ offset += subreq->wb_bytes;
+ pgbase += subreq->wb_bytes;
+
+ if (bytes_left) {
+ subreq = nfs_create_request(req->wb_context,
+ req->wb_page,
+ subreq, pgbase, bytes_left);
+ if (IS_ERR(subreq))
+ goto err_ptr;
+ nfs_lock_request(subreq);
+ subreq->wb_offset = offset;
+ subreq->wb_index = req->wb_index;
+ }
+ } while (bytes_left > 0);
+
+ nfs_page_group_unlock(req);
return 1;
+err_ptr:
+ desc->pg_error = PTR_ERR(subreq);
+ nfs_page_group_unlock(req);
+ return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -449,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
desc->pg_count = 0;
desc->pg_base = 0;
desc->pg_recoalesce = 0;
+ desc->pg_moreio = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
@@ -535,3 +1068,13 @@ void nfs_destroy_nfspagecache(void)
kmem_cache_destroy(nfs_page_cachep);
}
+static const struct rpc_call_ops nfs_pgio_common_ops = {
+ .rpc_call_prepare = nfs_pgio_prepare,
+ .rpc_call_done = nfs_pgio_result,
+ .rpc_release = nfs_pgio_release,
+};
+
+const struct nfs_pageio_ops nfs_pgio_rw_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_pgios,
+};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d75d938d36c..6fdcd233d6f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -662,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
*/
static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
{
- return (s32)s1 - (s32)s2 > 0;
+ return (s32)(s1 - s2) > 0;
+}
+
+static void
+pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new,
+ struct list_head *free_me_list)
+{
+ if (nfs4_stateid_match_other(&lo->plh_stateid, new))
+ return;
+ /* Layout is new! Kill existing layout segments */
+ pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
}
/* update lo->plh_stateid with new if is more recent */
@@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode;
+ LIST_HEAD(free_me);
int status = 0;
/* Inject layout blob into I/O device driver */
@@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget_reply;
}
+ /* Check that the new stateid matches the old stateid */
+ pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);
@@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me);
return lseg;
out:
return ERR_PTR(status);
@@ -1373,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
-
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
@@ -1402,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
{
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_write_mds(pgio);
- return;
- }
-
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1419,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_read(pgio, inode, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (pgio->pg_lseg == NULL)
- return nfs_generic_pg_test(pgio, prev, req);
+ unsigned int size;
+ u64 seg_end, req_start, seg_left;
+
+ size = nfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
/*
- * Test if a nfs_page is fully contained in the pnfs_layout_range.
- * Note that this test makes several assumptions:
- * - that the previous nfs_page in the struct nfs_pageio_descriptor
- * is known to lie within the range.
- * - that the nfs_page being tested is known to be contiguous with the
- * previous nfs_page.
- * - Layout ranges are page aligned, so we only have to test the
- * start offset of the request.
+ * 'size' contains the number of bytes left in the current page (up
+ * to the original size asked for in @req->wb_bytes).
+ *
+ * Calculate how many bytes are left in the layout segment
+ * and if there are less bytes than 'size', return that instead.
*
* Please also note that 'end_offset' is actually the offset of the
* first byte that lies outside the pnfs_layout_range. FIXME?
*
*/
- return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length);
+ if (pgio->pg_lseg) {
+ seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
+ req_start = req_offset(req);
+ WARN_ON_ONCE(req_start > seg_end);
+ /* start of request is past the last byte of this segment */
+ if (req_start >= seg_end)
+ return 0;
+
+ /* adjust 'size' iff there are fewer bytes left in the
+ * segment than what nfs_generic_pg_test returned */
+ seg_left = seg_end - req_start;
+ if (seg_left < size)
+ size = (unsigned int)seg_left;
+ }
+
+ return size;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
@@ -1481,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+ nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1504,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1523,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1539,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1548,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_writedata_release(data);
+ nfs_pgio_data_release(data);
}
static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
@@ -1574,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
}
static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr, int how)
{
- struct nfs_write_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_write_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_writehdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_write_header *whdr;
+ struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
- whdr = nfs_writehdr_alloc();
+ whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!whdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg);
@@ -1619,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+ pnfs_do_write(desc, hdr, desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
@@ -1640,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read(&pgio, inode, compl_ops);
+ nfs_pageio_init_read(&pgio, inode, true, compl_ops);
pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1659,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1678,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1694,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1703,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_readdata_release(data);
+ nfs_pgio_data_release(data);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
@@ -1732,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
}
static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{
- struct nfs_read_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_read_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_readhdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_read_header *rhdr;
+ struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr;
int ret;
- rhdr = nfs_readhdr_alloc();
+ rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!rhdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
@@ -1778,18 +1765,27 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+ pnfs_do_read(desc, hdr);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+static void pnfs_clear_layoutcommitting(struct inode *inode)
+{
+ unsigned long *bitlock = &NFS_I(inode)->flags;
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_atomic();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
/*
* There can be multiple RW segments.
*/
@@ -1807,7 +1803,6 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
{
struct pnfs_layout_segment *lseg, *tmp;
- unsigned long *bitlock = &NFS_I(inode)->flags;
/* Matched by references in pnfs_set_layoutcommit */
list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
@@ -1815,9 +1810,7 @@ static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *lis
pnfs_put_lseg(lseg);
}
- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
- smp_mb__after_clear_bit();
- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+ pnfs_clear_layoutcommitting(inode);
}
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
@@ -1827,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
@@ -1881,43 +1874,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs4_layoutcommit_data *data;
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
- int status = 0;
-
- dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
+ int status;
- if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ if (!pnfs_layoutcommit_outstanding(inode))
return 0;
- /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data) {
- status = -ENOMEM;
- goto out;
- }
-
- if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
- goto out_free;
+ dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
+ status = -EAGAIN;
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
- if (!sync) {
- status = -EAGAIN;
- goto out_free;
- }
- status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ if (!sync)
+ goto out;
+ status = wait_on_bit_lock(&nfsi->flags,
+ NFS_INO_LAYOUTCOMMITTING,
+ nfs_wait_bit_killable,
+ TASK_KILLABLE);
if (status)
- goto out_free;
+ goto out;
}
- INIT_LIST_HEAD(&data->lseg_list);
+ status = -ENOMEM;
+ /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
+ data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (!data)
+ goto clear_layoutcommitting;
+
+ status = 0;
spin_lock(&inode->i_lock);
- if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
- clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
- spin_unlock(&inode->i_lock);
- wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
- goto out_free;
- }
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ goto out_unlock;
+ INIT_LIST_HEAD(&data->lseg_list);
pnfs_list_write_lseg(inode, &data->lseg_list);
end_pos = nfsi->layout->plh_lwb;
@@ -1940,8 +1927,11 @@ out:
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
-out_free:
+out_unlock:
+ spin_unlock(&inode->i_lock);
kfree(data);
+clear_layoutcommitting:
+ pnfs_clear_layoutcommitting(inode);
goto out;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a4f41810a7f..4fb309a2b4c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
- enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
- enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+ enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+ enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
- const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
- int, const struct nfs_pgio_completion_ops *);
-
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -275,7 +271,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
atomic_inc(&lseg->pls_refcount);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
}
return lseg;
}
@@ -359,6 +355,15 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode)
PNFS_LAYOUTRET_ON_SETATTR;
}
+static inline bool
+pnfs_layoutcommit_outstanding(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags) != 0 ||
+ test_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags) != 0;
+}
+
static inline int pnfs_return_layout(struct inode *ino)
{
struct nfs_inode *nfsi = NFS_I(ino);
@@ -452,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
static inline int
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
@@ -515,6 +508,13 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
return false;
}
+static inline bool
+pnfs_layoutcommit_outstanding(struct inode *inode)
+{
+ return false;
+}
+
+
static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
return NULL;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index fddbba2d9ef..c171ce1a8a3 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -357,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
}
static int
-nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .old_name = old_name,
- .new_dir = NFS_FH(new_dir),
- .new_name = new_name,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
- .rpc_argp = &arg,
- };
- int status;
-
- dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
- nfs_mark_for_revalidate(old_dir);
- nfs_mark_for_revalidate(new_dir);
- dprintk("NFS reply rename: %d\n", status);
- return status;
-}
-
-static int
nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_linkargs arg = {
@@ -602,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0;
}
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -618,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
return 0;
}
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -638,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
- return 0;
-}
-
static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
BUG();
@@ -745,7 +715,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.unlink_setup = nfs_proc_unlink_setup,
.unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
.unlink_done = nfs_proc_unlink_done,
- .rename = nfs_proc_rename,
.rename_setup = nfs_proc_rename_setup,
.rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
.rename_done = nfs_proc_rename_done,
@@ -759,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs2_decode_dirent,
+ .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
.read_setup = nfs_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs_proc_read_rpc_prepare,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 31db5c366b8..e818a475ca6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,85 +24,24 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_common_ops;
static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
-struct nfs_read_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
{
- struct nfs_read_header *rhdr;
-
- rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
- if (rhdr) {
- struct nfs_pgio_header *hdr = &rhdr->header;
-
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- }
- return rhdr;
+ return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
{
- struct nfs_read_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
-{
- struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
-
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
-
-void nfs_readdata_release(struct nfs_read_data *rdata)
-{
- struct nfs_pgio_header *hdr = rdata->header;
- struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
-
- put_nfs_open_context(rdata->args.context);
- if (rdata->pages.pagevec != rdata->pages.page_array)
- kfree(rdata->pages.pagevec);
- if (rdata == &read_header->rpc_data) {
- rdata->header = NULL;
- rdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(rdata);
-}
-EXPORT_SYMBOL_GPL(nfs_readdata_release);
static
int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page)
}
void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
- NFS_SERVER(inode)->rsize, 0);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+ server->rsize, 0);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, inode, page, 0, len);
+ new = nfs_create_request(ctx, page, NULL, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,20 +105,31 @@ static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *d_inode = req->wb_context->dentry->d_inode;
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+ (long long)req_offset(req));
- unlock_page(req->wb_page);
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
- dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+ unlock_page(req->wb_page);
+ }
+
+ dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
}
-/* Note io was page aligned */
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+ if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+ SetPageUptodate(req->wb_page);
+}
+
static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
+ unsigned long start = req->wb_pgbase;
+ unsigned long end = req->wb_pgbase + req->wb_bytes;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
- if (bytes > hdr->good_bytes)
- zero_user(page, 0, PAGE_SIZE);
- else if (hdr->good_bytes - bytes < PAGE_SIZE)
- zero_user_segment(page,
- hdr->good_bytes & ~PAGE_MASK,
- PAGE_SIZE);
+ /* note: regions of the page not covered by a
+ * request are zeroed in nfs_readpage_async /
+ * readpage_async_filler */
+ if (bytes > hdr->good_bytes) {
+ /* nothing in this request was good, so zero
+ * the full extent of the request */
+ zero_user_segment(page, start, end);
+
+ } else if (hdr->good_bytes - bytes < req->wb_bytes) {
+ /* part of this request has good bytes, but
+ * not all. zero the bad bytes */
+ start += hdr->good_bytes - bytes;
+ WARN_ON(start < req->wb_pgbase);
+ zero_user_segment(page, start, end);
+ }
}
bytes += req->wb_bytes;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (bytes <= hdr->good_bytes)
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
} else
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
@@ -203,95 +172,14 @@ out:
hdr->release(hdr);
}
-int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = clnt,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | swap_flags | flags,
- };
-
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
- "offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- rpc_put_task(task);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
-/*
- * Set up the NFS read request struct
- */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
- unsigned int count, unsigned int offset)
-{
- struct nfs_page *req = data->header->req;
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.eof = 0;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_read(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
-}
-
-static int
-nfs_do_multiple_reads(struct list_head *head,
- const struct rpc_call_ops *call_ops)
-{
- struct nfs_read_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_read(data, call_ops);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ task_setup_data->flags |= swap_flags;
+ NFS_PROTO(inode)->read_setup(data, msg);
}
static void
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
.completion = nfs_read_completion,
};
-static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_read_data, list);
- list_del(&data->list);
- nfs_readdata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple requests to fill a single page.
- *
- * We optimize to reduce the number of read operations on the wire. If we
- * detect that we're reading a page, or an area of a page, that is past the
- * end of file, we do not generate NFS read operations but just clear the
- * parts of the page that would have come back zero from the server anyway.
- *
- * We rely on the cached value of i_size to make this determination; another
- * client can fill pages on the server past our cached end-of-file, but we
- * won't see the new data until our attribute cache is updated. This is more
- * or less conventional NFS client behavior.
- */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_read_data *data;
- size_t rsize = desc->pg_bsize, nbytes;
- unsigned int offset;
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes,rsize);
-
- data = nfs_readdata_alloc(hdr, 1);
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_read_rpcsetup(data, len, offset);
- list_add(&data->list, &hdr->rpc_list);
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
-
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_read_data *data;
- struct list_head *head = &desc->pg_list;
-
- data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
-
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- nfs_read_rpcsetup(data, desc->pg_count, 0);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc, hdr);
- return nfs_pagein_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_pagein);
-
-static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_read_header *rhdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- rhdr = nfs_readhdr_alloc();
- if (!rhdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &rhdr->header;
- nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&hdr->rpc_list,
- desc->pg_rpc_callops);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_read_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
-};
-
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct inode *inode = data->header->inode;
- int status;
-
- dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
- task->tk_status);
-
- status = NFS_PROTO(inode)->read_done(task, data);
+ int status = NFS_PROTO(inode)->read_done(task, data);
if (status != 0)
return status;
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_readargs *argp = &data->args;
- struct nfs_readres *resp = &data->res;
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
/* This is a short read! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
rpc_restart_call_prepare(task);
}
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
- /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
- if (nfs_readpage_result(task, data) != 0)
- return;
- if (task->tk_status < 0)
- nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
- else if (data->res.eof) {
+ if (data->res.eof) {
loff_t bound;
bound = data->args.offset + data->res.count;
@@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
nfs_readpage_retry(task, data);
}
-static void nfs_readpage_release_common(void *calldata)
-{
- nfs_readdata_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_read_data *data = calldata;
- int err;
- err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
- if (err)
- rpc_exit(task, err);
-}
-
-static const struct rpc_call_ops nfs_read_common_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_common,
- .rpc_release = nfs_readpage_release_common,
-};
-
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
@@ -592,7 +325,6 @@ static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *new;
unsigned int len;
int error;
@@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page)
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(desc->ctx, inode, page, 0, len);
+ new = nfs_create_request(desc->ctx, page, NULL, 0, len);
if (IS_ERR(new))
goto out_error;
@@ -630,9 +362,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
unsigned long npages;
int ret = -ESTALE;
- dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
+ dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
+ (unsigned long long)NFS_FILEID(inode),
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
@@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
@@ -671,7 +404,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_read_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
@@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void)
{
kmem_cache_destroy(nfs_rdata_cachep);
}
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+ .rw_mode = FMODE_READ,
+ .rw_alloc_header = nfs_readhdr_alloc,
+ .rw_free_header = nfs_readhdr_free,
+ .rw_done = nfs_readpage_done,
+ .rw_result = nfs_readpage_result,
+ .rw_initiate = nfs_initiate_read,
+};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 910ed906eb8..084af1060d7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2180,11 +2180,23 @@ out_no_address:
return -EINVAL;
}
+#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+ | NFS_MOUNT_SECURE \
+ | NFS_MOUNT_TCP \
+ | NFS_MOUNT_VER3 \
+ | NFS_MOUNT_KERBEROS \
+ | NFS_MOUNT_NONLM \
+ | NFS_MOUNT_BROKEN_SUID \
+ | NFS_MOUNT_STRICTLOCK \
+ | NFS_MOUNT_UNSHARED \
+ | NFS_MOUNT_NORESVPORT \
+ | NFS_MOUNT_LEGACY_INTERFACE)
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
{
- if (data->flags != nfss->flags ||
+ if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
data->version != nfss->nfs_client->rpc_ops->version ||
@@ -2215,6 +2227,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
u32 nfsvers = nfss->nfs_client->rpc_ops->version;
+ sync_filesystem(sb);
+
/*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
@@ -2246,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
data->version = nfsvers;
data->minorversion = nfss->nfs_client->cl_minorversion;
+ data->net = current->nsproxy->net_ns;
memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
data->nfs_server.addrlen);
@@ -2345,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
nfs_initialise_sb(sb);
}
-#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
- | NFS_MOUNT_SECURE \
- | NFS_MOUNT_TCP \
- | NFS_MOUNT_VER3 \
- | NFS_MOUNT_KERBEROS \
- | NFS_MOUNT_NONLM \
- | NFS_MOUNT_BROKEN_SUID \
- | NFS_MOUNT_STRICTLOCK \
- | NFS_MOUNT_UNSHARED \
- | NFS_MOUNT_NORESVPORT \
- | NFS_MOUNT_LEGACY_INTERFACE)
-
static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
{
const struct nfs_server *a = s->s_fs_info;
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 6b3f2535a3e..bb6ed810fa6 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -13,7 +13,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static ctl_table nfs_cb_sysctls[] = {
+static struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
@@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_dir[] = {
+static struct ctl_table nfs_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_root[] = {
+static struct ctl_table nfs_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 11d78944de7..de54129336c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,6 +14,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/namei.h>
+#include <linux/fsnotify.h>
#include "internal.h"
#include "nfs4_fs.h"
@@ -353,8 +354,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
return;
}
- if (task->tk_status != 0)
- nfs_cancel_async_unlink(old_dentry);
+ if (data->complete)
+ data->complete(task, data);
}
/**
@@ -399,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = {
*
* It's expected that valid references to the dentries and inodes are held
*/
-static struct rpc_task *
+struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
- struct dentry *old_dentry, struct dentry *new_dentry)
+ struct dentry *old_dentry, struct dentry *new_dentry,
+ void (*complete)(struct rpc_task *, struct nfs_renamedata *))
{
struct nfs_renamedata *data;
struct rpc_message msg = { };
@@ -438,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
data->new_dentry = dget(new_dentry);
nfs_fattr_init(&data->old_fattr);
nfs_fattr_init(&data->new_fattr);
+ data->complete = complete;
/* set up nfs_renameargs */
data->args.old_dir = NFS_FH(old_dir);
@@ -456,6 +459,27 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
return rpc_run_task(&task_setup_data);
}
+/*
+ * Perform tasks needed when a sillyrename is done such as cancelling the
+ * queued async unlink if it failed.
+ */
+static void
+nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ struct dentry *dentry = data->old_dentry;
+
+ if (task->tk_status != 0) {
+ nfs_cancel_async_unlink(dentry);
+ return;
+ }
+
+ /*
+ * vfs_unlink and the like do not issue this when a file is
+ * sillyrenamed, so do it here.
+ */
+ fsnotify_nameremove(dentry, 0);
+}
+
#define SILLYNAME_PREFIX ".nfs"
#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
@@ -548,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
}
/* run the rename task, undo unlink if it fails */
- task = nfs_async_rename(dir, dir, dentry, sdentry);
+ task = nfs_async_rename(dir, dir, dentry, sdentry,
+ nfs_complete_sillyrename);
if (IS_ERR(task)) {
error = -EBUSY;
nfs_cancel_async_unlink(dentry);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c1d548211c3..5e2f1030454 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,10 +42,11 @@
* Local function declarations
*/
static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -70,76 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-struct nfs_write_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
{
- struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
-
- if (p) {
- struct nfs_pgio_header *hdr = &p->header;
+ struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+ if (p)
memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- hdr->verf = &p->verf;
- }
return p;
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
{
- struct nfs_write_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
-{
- struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
mempool_free(whdr, nfs_wdata_mempool);
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
-
-void nfs_writedata_release(struct nfs_write_data *wdata)
-{
- struct nfs_pgio_header *hdr = wdata->header;
- struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
-
- put_nfs_open_context(wdata->args.context);
- if (wdata->pages.pagevec != wdata->pages.page_array)
- kfree(wdata->pages.pagevec);
- if (wdata == &write_header->rpc_data) {
- wdata->header = NULL;
- wdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(wdata);
-}
-EXPORT_SYMBOL_GPL(nfs_writedata_release);
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
@@ -148,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
@@ -161,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
- req = freq;
+ req = freq->wb_head;
break;
}
}
}
- if (req)
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+
kref_get(&req->wb_kref);
+ }
return req;
}
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -211,18 +170,78 @@ static void nfs_set_pageerror(struct page *page)
nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
}
+/*
+ * nfs_page_group_search_locked
+ * @head - head request of page group
+ * @page_offset - offset into page
+ *
+ * Search page group with head @head to find a request that contains the
+ * page offset @page_offset.
+ *
+ * Returns a pointer to the first matching nfs request, or NULL if no
+ * match is found.
+ *
+ * Must be called with the page group lock held
+ */
+static struct nfs_page *
+nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
+{
+ struct nfs_page *req;
+
+ WARN_ON_ONCE(head != head->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
+
+ req = head;
+ do {
+ if (page_offset >= req->wb_pgbase &&
+ page_offset < (req->wb_pgbase + req->wb_bytes))
+ return req;
+
+ req = req->wb_this_page;
+ } while (req != head);
+
+ return NULL;
+}
+
+/*
+ * nfs_page_group_covers_page
+ * @head - head request of page group
+ *
+ * Return true if the page group with head @head covers the whole page,
+ * returns false otherwise
+ */
+static bool nfs_page_group_covers_page(struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+ unsigned int pos = 0;
+ unsigned int len = nfs_page_length(req->wb_page);
+
+ nfs_page_group_lock(req);
+
+ do {
+ tmp = nfs_page_group_search_locked(req->wb_head, pos);
+ if (tmp) {
+ /* no way this should happen */
+ WARN_ON_ONCE(tmp->wb_pgbase != pos);
+ pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
+ }
+ } while (tmp && pos < len);
+
+ nfs_page_group_unlock(req);
+ WARN_ON_ONCE(pos > len);
+ return pos == len;
+}
+
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
-static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+static void nfs_mark_uptodate(struct nfs_page *req)
{
- if (PageUptodate(page))
- return;
- if (base != 0)
+ if (PageUptodate(req->wb_page))
return;
- if (count != nfs_page_length(page))
+ if (!nfs_page_group_covers_page(req))
return;
- SetPageUptodate(page);
+ SetPageUptodate(req->wb_page);
}
static int wb_priority(struct writeback_control *wbc)
@@ -258,46 +277,259 @@ static void nfs_set_page_writeback(struct page *page)
}
}
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = page_file_mapping(req->wb_page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
- end_page_writeback(page);
+ if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+ return;
+
+ end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ * @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+ clear_bit(PG_TEARDOWN, &req->wb_flags);
+ clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+ clear_bit(PG_UPTODATE, &req->wb_flags);
+ clear_bit(PG_WB_END, &req->wb_flags);
+ clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head - head request of page group, must be holding head lock
+ * @req - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ * and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req, bool nonblock)
+ __releases(&inode->i_lock)
+{
+ struct nfs_page *tmp;
+ int ret;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+ nfs_unlock_request(tmp);
+
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+ /* grab a ref on the request that will be waited on */
+ kref_get(&req->wb_kref);
+
+ nfs_page_group_unlock(head);
+ spin_unlock(&inode->i_lock);
+
+ /* release ref from nfs_page_find_head_request_locked */
+ nfs_release_request(head);
+
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
+ nfs_release_request(req);
+
+ return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+ struct nfs_page *old_head)
+{
+ while (destroy_list) {
+ struct nfs_page *subreq = destroy_list;
+
+ destroy_list = (subreq->wb_this_page == old_head) ?
+ NULL : subreq->wb_this_page;
+
+ WARN_ON_ONCE(old_head != subreq->wb_head);
+
+ /* make sure old group is not used */
+ subreq->wb_head = subreq;
+ subreq->wb_this_page = subreq;
+
+ nfs_clear_request_commit(subreq);
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_request(subreq);
+
+ if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ /* release ref on old head request */
+ nfs_release_request(old_head);
+
+ nfs_page_group_clear_bits(subreq);
+
+ /* release the PG_INODE_REF reference */
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ nfs_release_request(subreq);
+ else
+ WARN_ON_ONCE(1);
+ } else {
+ WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+ /* zombie requests have already released the last
+ * reference and were waiting on the rest of the
+ * group to complete. Since it's no longer part of a
+ * group, simply free the request */
+ nfs_page_group_clear_bits(subreq);
+ nfs_free_request(subreq);
+ }
+ }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ * a locked reference, cancelling any pending
+ * operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *head, *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int total_bytes;
int ret;
+try_again:
+ total_bytes = 0;
+
+ WARN_ON_ONCE(destroy_list);
+
spin_lock(&inode->i_lock);
- for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req))
- break;
- /* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request() will always
- * succeed provided that someone hasn't already marked the
- * request as dirty (in which case we don't care).
- */
+
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+ if (!head) {
spin_unlock(&inode->i_lock);
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
- if (ret != 0)
+ return NULL;
+ }
+
+ /* lock each request in the page group */
+ nfs_page_group_lock(head);
+ subreq = head;
+ do {
+ /*
+ * Subrequests are always contiguous, non overlapping
+ * and in order. If not, it's a programming error.
+ */
+ WARN_ON_ONCE(subreq->wb_offset !=
+ (head->wb_offset + total_bytes));
+
+ /* keep track of how many bytes this group covers */
+ total_bytes += subreq->wb_bytes;
+
+ if (!nfs_lock_request(subreq)) {
+ /* releases page group bit lock and
+ * inode spin lock and all references */
+ ret = nfs_unroll_locks_and_wait(inode, head,
+ subreq, nonblock);
+
+ if (ret == 0)
+ goto try_again;
+
return ERR_PTR(ret);
- spin_lock(&inode->i_lock);
+ }
+
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_list_remove_request(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+
+ /* change head request to cover whole range that
+ * the former page group covered */
+ head->wb_bytes = total_bytes;
}
+
+ /*
+ * prepare head request to be added to new pgio descriptor
+ */
+ nfs_page_group_clear_bits(head);
+
+ /*
+ * some part of the group was still on the inode list - otherwise
+ * the group wouldn't be involved in async write.
+ * grab a reference for the head request, iff it needs one.
+ */
+ if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ kref_get(&head->wb_kref);
+
+ nfs_page_group_unlock(head);
+
+ /* drop lock to clear_request_commit the head req and clean up
+ * requests on destroy list */
spin_unlock(&inode->i_lock);
- return req;
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+ /* clean up commit list state */
+ nfs_clear_request_commit(head);
+
+ /* still holds ref on head from nfs_page_find_head_request_locked
+ * and still has lock on head from lock loop */
+ return head;
}
/*
@@ -310,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page, nonblock);
+ req = nfs_lock_and_join_requests(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -354,10 +586,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
- NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
- page->mapping->host,
- wb_priority(wbc),
- &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+ false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -400,12 +630,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+ &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(bitlock, NFS_INO_FLUSHING);
if (err < 0)
@@ -425,6 +656,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
/* Lock the request! */
nfs_lock_request(req);
@@ -441,6 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
+ /* this a head request for a page group - mark it as having an
+ * extra reference so sub groups can follow suit */
+ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -452,16 +688,23 @@ static void nfs_inode_remove_request(struct nfs_page *req)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *head;
- spin_lock(&inode->i_lock);
- if (likely(!PageSwapCache(req->wb_page))) {
- set_page_private(req->wb_page, 0);
- ClearPagePrivate(req->wb_page);
- clear_bit(PG_MAPPED, &req->wb_flags);
+ if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+ head = req->wb_head;
+
+ spin_lock(&inode->i_lock);
+ if (likely(!PageSwapCache(head->wb_page))) {
+ set_page_private(head->wb_page, 0);
+ ClearPagePrivate(head->wb_page);
+ clear_bit(PG_MAPPED, &head->wb_flags);
+ }
+ nfsi->npages--;
+ spin_unlock(&inode->i_lock);
}
- nfsi->npages--;
- spin_unlock(&inode->i_lock);
- nfs_release_request(req);
+
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ nfs_release_request(req);
}
static void
@@ -583,7 +826,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
return data->header->lseg == NULL;
@@ -614,7 +857,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
return 0;
}
@@ -645,7 +888,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
- memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+ memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
@@ -653,7 +896,7 @@ remove_req:
nfs_inode_remove_request(req);
next:
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
nfs_release_request(req);
}
out:
@@ -661,7 +904,7 @@ out:
}
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-static unsigned long
+unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return cinfo->mds->ncommit;
@@ -718,7 +961,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
}
#else
-static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -754,10 +997,14 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
+ /* should be handled by nfs_flush_incompatible */
+ WARN_ON_ONCE(req->wb_head != req);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
rqend = req->wb_offset + req->wb_bytes;
/*
* Tell the caller to flush out the request if
@@ -819,7 +1066,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, inode, page, offset, bytes);
+ req = nfs_create_request(ctx, page, NULL, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -837,7 +1084,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
return PTR_ERR(req);
/* Update file length */
nfs_grow_file(page, offset, count);
- nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
@@ -858,11 +1105,13 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_request(page);
+ req = nfs_page_find_head_request(page);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx;
+ /* for now, flush if more than 1 request in page_group */
+ do_flush |= req->wb_this_page != req;
if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid;
@@ -909,11 +1158,18 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
*/
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
if (nfs_have_delegated_attributes(inode))
goto out;
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ return false;
+ smp_rmb();
+ if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
return false;
out:
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return false;
return PageUptodate(page) != 0;
}
@@ -922,19 +1178,20 @@ out:
* extend the write to cover the entire page in order to avoid fragmentation
* inefficiencies.
*
- * If the file is opened for synchronous writes or if we have a write delegation
- * from the server then we can just skip the rest of the checks.
+ * If the file is opened for synchronous writes then we can just skip the rest
+ * of the checks.
*/
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{
if (file->f_flags & O_DSYNC)
return 0;
+ if (!nfs_write_pageuptodate(page, inode))
+ return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
- if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL ||
- (inode->i_flock->fl_start == 0 &&
+ if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
inode->i_flock->fl_end == OFFSET_MAX &&
- inode->i_flock->fl_type != F_RDLCK)))
+ inode->i_flock->fl_type != F_RDLCK))
return 1;
return 0;
}
@@ -984,126 +1241,17 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = clnt,
- .task = &data->task,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | flags,
- .priority = priority,
- };
- int ret = 0;
-
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->write_setup(data, &msg);
- dprintk("NFS: %5u initiated write call "
- "(req %s/%lld, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ task_setup_data->priority = priority;
+ NFS_PROTO(inode)->write_setup(data, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
- &task_setup_data.rpc_client, &msg, data);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
- rpc_put_task(task);
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
-
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
- unsigned int count, unsigned int offset,
- int how, struct nfs_commit_info *cinfo)
-{
- struct nfs_page *req = data->header->req;
-
- /* Set up the RPC argument and reply structs
- * NB: take care not to mess about with data->commit et al. */
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- /* pnfs_set_layoutcommit needs this */
- data->mds_offset = data->args.offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
- data->args.stable = NFS_UNSTABLE;
- switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
- case 0:
- break;
- case FLUSH_COND_STABLE:
- if (nfs_reqs_to_commit(cinfo))
- break;
- default:
- data->args.stable = NFS_FILE_SYNC;
- }
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_write(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
-}
-
-static int nfs_do_multiple_writes(struct list_head *head,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct nfs_write_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_write(data, call_ops, how);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ &task_setup_data->rpc_client, msg, data);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1114,7 +1262,7 @@ static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1134,173 +1282,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.completion = nfs_write_completion,
};
-static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_write_data, list);
- list_del(&data->list);
- nfs_writedata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple small requests to write out a single
- * contiguous dirty area on one page.
- */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_write_data *data;
- size_t wsize = desc->pg_bsize, nbytes;
- unsigned int offset;
- int requests = 0;
- struct nfs_commit_info cinfo;
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
- desc->pg_count > wsize))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes, wsize);
-
- data = nfs_writedata_alloc(hdr, 1);
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- requests++;
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-/*
- * Create an RPC task for the given write request and kick it.
- * The page must have been locked by the caller.
- *
- * It may happen that the page we're passed is not marked dirty.
- * This is the case if nfs_updatepage detects a conflicting request
- * that has been written but not committed.
- */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_write_data *data;
- struct list_head *head = &desc->pg_list;
- struct nfs_commit_info cinfo;
-
- data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
- /* Set up the argument struct */
- nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc, hdr);
- return nfs_flush_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_flush);
-
-static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_write_header *whdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- whdr = nfs_writehdr_alloc();
- if (!whdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &whdr->header;
- nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&hdr->rpc_list,
- desc->pg_rpc_callops,
- desc->pg_ioflags);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_write_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
-};
-
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
- NFS_SERVER(inode)->wsize, ioflags);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+ server->wsize, ioflags);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
- int err;
- err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
- if (err)
- rpc_exit(task, err);
-}
-
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_commit_data *data = calldata;
@@ -1308,23 +1313,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- * writebacks since the page->count is kept > 1 for as long
- * as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_common(void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
@@ -1333,34 +1323,46 @@ static void nfs_writeback_release_common(void *calldata)
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
; /* Do nothing */
else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
- memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
- else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+ memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+ else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
spin_unlock(&hdr->lock);
}
- nfs_writedata_release(data);
}
-static const struct rpc_call_ops nfs_write_common_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_common,
- .rpc_release = nfs_writeback_release_common,
-};
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct nfs_writeargs *argp = &data->args;
- struct nfs_writeres *resp = &data->res;
- struct inode *inode = data->header->inode;
int status;
- dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
- task->tk_pid, task->tk_status);
-
/*
* ->write_done will attempt to use post-op attributes to detect
* conflicting writes by other clients. A strict interpretation
@@ -1370,11 +1372,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
- return;
- nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+ return status;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
- if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1390,18 +1392,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
- resp->verf->committed, argp->stable);
+ data->res.verf->committed, data->args.stable);
complain = jiffies + 300 * HZ;
}
}
#endif
- if (task->tk_status < 0)
- nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
- else if (resp->count < argp->count) {
+
+ /* Deal with the suid/sgid bit corner case */
+ if (nfs_should_remove_suid(inode))
+ nfs_mark_for_revalidate(inode);
+ return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
+
+ if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
- nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1452,7 +1467,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
{
clear_bit(NFS_INO_COMMIT, &nfsi->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
}
@@ -1606,9 +1621,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
nfs_list_remove_request(req);
nfs_clear_page_commit(req->wb_page);
- dprintk("NFS: commit (%s/%lld %d@%lld)",
+ dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
@@ -1782,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
struct nfs_page *req;
int ret = 0;
- for (;;) {
- wait_on_page_writeback(page);
- req = nfs_page_find_request(page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req)) {
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_and_release_request(req);
- break;
- }
- ret = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (ret < 0)
- break;
+ wait_on_page_writeback(page);
+
+ /* blocking call to cancel all requests and join to a single (head)
+ * request */
+ req = nfs_lock_and_join_requests(page, false);
+
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ } else if (req) {
+ /* all requests from this page have been cancelled by
+ * nfs_lock_and_join_requests, so just remove the head
+ * request from the inode / page_private pointer and
+ * release it */
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_and_release_request(req);
}
+
return ret;
}
@@ -1868,7 +1884,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_write_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
@@ -1930,3 +1946,12 @@ void nfs_destroy_writepagecache(void)
kmem_cache_destroy(nfs_wdata_cachep);
}
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+ .rw_mode = FMODE_WRITE,
+ .rw_alloc_header = nfs_writehdr_alloc,
+ .rw_free_header = nfs_writehdr_free,
+ .rw_release = nfs_writeback_release_common,
+ .rw_done = nfs_writeback_done,
+ .rw_result = nfs_writeback_result,
+ .rw_initiate = nfs_initiate_write,
+};