aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig31
-rw-r--r--fs/nfs/Makefile15
-rw-r--r--fs/nfs/blocklayout/blocklayout.c87
-rw-r--r--fs/nfs/blocklayout/blocklayout.h3
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c4
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c12
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/cache_lib.c12
-rw-r--r--fs/nfs/cache_lib.h2
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c98
-rw-r--r--fs/nfs/callback_xdr.c54
-rw-r--r--fs/nfs/client.c23
-rw-r--r--fs/nfs/delegation.c291
-rw-r--r--fs/nfs/delegation.h6
-rw-r--r--fs/nfs/dir.c507
-rw-r--r--fs/nfs/direct.c706
-rw-r--r--fs/nfs/dns_resolve.c101
-rw-r--r--fs/nfs/file.c242
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)302
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)9
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)59
-rw-r--r--fs/nfs/fscache.c202
-rw-r--r--fs/nfs/fscache.h18
-rw-r--r--fs/nfs/getroot.c8
-rw-r--r--fs/nfs/idmap.c319
-rw-r--r--fs/nfs/inode.c443
-rw-r--r--fs/nfs/internal.h129
-rw-r--r--fs/nfs/mount_clnt.c14
-rw-r--r--fs/nfs/namespace.c27
-rw-r--r--fs/nfs/nfs2xdr.c33
-rw-r--r--fs/nfs/nfs3acl.c330
-rw-r--r--fs/nfs/nfs3proc.c153
-rw-r--r--fs/nfs/nfs3super.c3
-rw-r--r--fs/nfs/nfs3xdr.c34
-rw-r--r--fs/nfs/nfs4_fs.h168
-rw-r--r--fs/nfs/nfs4client.c542
-rw-r--r--fs/nfs/nfs4file.c25
-rw-r--r--fs/nfs/nfs4getroot.c4
-rw-r--r--fs/nfs/nfs4namespace.c250
-rw-r--r--fs/nfs/nfs4proc.c3011
-rw-r--r--fs/nfs/nfs4session.c145
-rw-r--r--fs/nfs/nfs4session.h51
-rw-r--r--fs/nfs/nfs4state.c656
-rw-r--r--fs/nfs/nfs4super.c48
-rw-r--r--fs/nfs/nfs4sysctl.c6
-rw-r--r--fs/nfs/nfs4trace.c17
-rw-r--r--fs/nfs/nfs4trace.h1148
-rw-r--r--fs/nfs/nfs4xdr.c759
-rw-r--r--fs/nfs/nfstrace.c9
-rw-r--r--fs/nfs/nfstrace.h730
-rw-r--r--fs/nfs/objlayout/objio_osd.c27
-rw-r--r--fs/nfs/objlayout/objlayout.c28
-rw-r--r--fs/nfs/objlayout/objlayout.h10
-rw-r--r--fs/nfs/pagelist.c716
-rw-r--r--fs/nfs/pnfs.c541
-rw-r--r--fs/nfs/pnfs.h73
-rw-r--r--fs/nfs/pnfs_dev.c9
-rw-r--r--fs/nfs/proc.c69
-rw-r--r--fs/nfs/read.c419
-rw-r--r--fs/nfs/super.c491
-rw-r--r--fs/nfs/sysctl.c6
-rw-r--r--fs/nfs/unlink.c108
-rw-r--r--fs/nfs/write.c1015
66 files changed, 10558 insertions, 4822 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 13ca196385f..3dece03f2fc 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -104,20 +104,29 @@ config NFS_V4_1
If unsure, say N.
+config NFS_V4_2
+ bool "NFS client support for NFSv4.2"
+ depends on NFS_V4_1
+ help
+ This option enables support for minor version 2 of the NFSv4 protocol
+ in the kernel's NFS client.
+
+ If unsure, say N.
+
config PNFS_FILE_LAYOUT
tristate
depends on NFS_V4_1
- default m
+ default NFS_V4
config PNFS_BLOCK
tristate
depends on NFS_V4_1 && BLK_DEV_DM
- default m
+ default NFS_V4
config PNFS_OBJLAYOUT
tristate
depends on NFS_V4_1 && SCSI_OSD_ULD
- default m
+ default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
@@ -131,6 +140,22 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
If the NFS client is unchanged from the upstream kernel, this
option should be set to the default "kernel.org".
+config NFS_V4_1_MIGRATION
+ bool "NFSv4.1 client support for migration"
+ depends on NFS_V4_1
+ default n
+ help
+ This option makes the NFS client advertise to NFSv4.1 servers that
+ it can support NFSv4 migration.
+
+ The NFSv4.1 pieces of the Linux NFSv4 migration implementation are
+ still experimental. If you are not an NFSv4 developer, say N here.
+
+config NFS_V4_SECURITY_LABEL
+ bool
+ depends on NFS_V4_2 && SECURITY
+ default y
+
config ROOT_NFS
bool "Root file system on NFS"
depends on NFS_FS=y && IP_PNP
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index cce2c057bd2..4782e0840dc 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,10 +4,10 @@
obj-$(CONFIG_NFS_FS) += nfs.o
+CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
direct.o pagelist.o read.o symlink.o unlink.o \
- write.o namespace.o mount_clnt.o \
- dns_resolve.o cache_lib.o
+ write.o namespace.o mount_clnt.o nfstrace.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
@@ -20,14 +20,15 @@ nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
obj-$(CONFIG_NFS_V4) += nfsv4.o
+CFLAGS_nfs4trace.o += -I$(src)
nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
- nfs4namespace.o nfs4getroot.o nfs4client.o
+ nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \
+ dns_resolve.o nfs4trace.o
+nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
-nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o
-
-obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
-nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
+nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f4..9b431f44fad 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -134,8 +134,8 @@ bl_submit_bio(int rw, struct bio *bio)
if (bio) {
get_parallel(bio->bi_private);
dprintk("%s submitting %s bio %u@%llu\n", __func__,
- rw == READ ? "read" : "write",
- bio->bi_size, (unsigned long long)bio->bi_sector);
+ rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+ (unsigned long long)bio->bi_iter.bi_sector);
submit_bio(rw, bio);
}
return NULL;
@@ -156,7 +156,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
}
if (bio) {
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_iter.bi_sector = isect - be->be_f_offset +
+ be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io;
bio->bi_private = par;
@@ -201,19 +202,15 @@ static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
static void bl_end_io_read(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct bio_vec *bvec;
+ int i;
- do {
- struct page *page = bvec->bv_page;
+ if (!err)
+ bio_for_each_segment_all(bvec, bio, i)
+ SetPageUptodate(bvec->bv_page);
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
- if (uptodate)
- SetPageUptodate(page);
- } while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
- struct nfs_read_data *rdata = par->data;
+ if (err) {
+ struct nfs_pgio_data *rdata = par->data;
struct nfs_pgio_header *header = rdata->header;
if (!header->pnfs_error)
@@ -227,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -245,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *header = rdata->header;
int i, hole;
@@ -383,21 +380,17 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
static void bl_end_io_write_zero(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-
- do {
- struct page *page = bvec->bv_page;
+ struct bio_vec *bvec;
+ int i;
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
+ bio_for_each_segment_all(bvec, bio, i) {
/* This is the zeroing page we added */
- end_page_writeback(page);
- page_cache_release(page);
- } while (bvec >= bio->bi_io_vec);
+ end_page_writeback(bvec->bv_page);
+ page_cache_release(bvec->bv_page);
+ }
- if (unlikely(!uptodate)) {
- struct nfs_write_data *data = par->data;
+ if (unlikely(err)) {
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!header->pnfs_error)
@@ -412,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *data = par->data;
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!uptodate) {
@@ -430,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -445,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(wdata->header->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -519,7 +512,7 @@ bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
(offset / SECTOR_SIZE);
- bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset;
bio->bi_bdev = be->be_mdev;
bio->bi_end_io = bl_read_single_end_io;
@@ -680,7 +673,7 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
{
struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
@@ -1089,9 +1082,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
dev->pgbase = 0;
dev->pglen = PAGE_SIZE * max_pages;
dev->mincount = 0;
+ dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
- rc = nfs4_proc_getdeviceinfo(server, dev);
+ rc = nfs4_proc_getdeviceinfo(server, dev, NULL);
dprintk("%s getdevice info returns %d\n", __func__, rc);
if (rc) {
rv = ERR_PTR(rc);
@@ -1195,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
pnfs_generic_pg_init_read(pgio, req);
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, SECTOR_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -1219,7 +1217,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
if (end != NFS_I(inode)->npages) {
rcu_read_lock();
- end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX);
+ end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
}
@@ -1247,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, PAGE_CACHE_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -1273,6 +1275,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
static struct pnfs_layoutdriver_type blocklayout_type = {
.id = LAYOUT_BLOCK_VOLUME,
.name = "LAYOUT_BLOCK_VOLUME",
+ .owner = THIS_MODULE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
.alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f4891bde885..9838fb02047 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -36,6 +36,7 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include "../nfs4_fs.h"
#include "../pnfs.h"
#include "../netns.h"
@@ -173,7 +174,7 @@ struct bl_msg_hdr {
/* blocklayoutdev.c */
ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
-int nfs4_blkdev_put(struct block_device *bdev);
+void nfs4_blkdev_put(struct block_device *bdev);
struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
struct pnfs_device *dev);
int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index a86c5bdad9e..04303b5c936 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -56,11 +56,11 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
/*
* Release the block device
*/
-int nfs4_blkdev_put(struct block_device *bdev)
+void nfs4_blkdev_put(struct block_device *bdev)
{
dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
MINOR(bdev->bd_dev));
- return blkdev_put(bdev, FMODE_READ);
+ blkdev_put(bdev, FMODE_READ);
}
ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17..8999cfddd86 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
bl_pipe_msg.bl_wq = &nn->bl_wq;
memset(msg, 0, sizeof(*msg));
- msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+ msg->len = sizeof(bl_msg) + bl_msg.totallen;
+ msg->data = kzalloc(msg->len, GFP_NOFS);
if (!msg->data)
goto out;
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
memcpy(msg->data, &bl_msg, sizeof(bl_msg));
dataptr = (uint8_t *) msg->data;
memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
- msg->len = sizeof(bl_msg) + bl_msg.totallen;
add_wait_queue(&nn->bl_wq, &wq);
if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
@@ -88,14 +88,8 @@ out:
*/
static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
{
- int rv;
-
dprintk("%s Releasing\n", __func__);
- rv = nfs4_blkdev_put(bdev->bm_mdev);
- if (rv)
- printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n",
- __func__, rv);
-
+ nfs4_blkdev_put(bdev->bm_mdev);
dev_remove(bdev->net, bdev->bm_mdev->bd_dev);
}
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed..4d016144256 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 862a2f16db6..5f7b053720e 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
struct super_block *pipefs_sb;
int ret = 0;
+ sunrpc_init_cache_detail(cd);
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
ret = nfs_cache_register_sb(pipefs_sb, cd);
rpc_put_sb_net(net);
+ if (ret)
+ sunrpc_destroy_cache_detail(cd);
}
return ret;
}
@@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
nfs_cache_unregister_sb(pipefs_sb, cd);
rpc_put_sb_net(net);
}
-}
-
-void nfs_cache_init(struct cache_detail *cd)
-{
- sunrpc_init_cache_detail(cd);
-}
-
-void nfs_cache_destroy(struct cache_detail *cd)
-{
sunrpc_destroy_cache_detail(cd);
}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 317db95e37f..4116d2c3f52 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
-extern void nfs_cache_init(struct cache_detail *cd);
-extern void nfs_cache_destroy(struct cache_detail *cd);
extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
extern int nfs_cache_register_sb(struct super_block *sb,
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 5088b57b078..073b4cf67ed 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_should_stop()) {
+ if (try_to_freeze())
+ continue;
+
prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
@@ -161,8 +164,7 @@ nfs41_callback_up(struct svc_serv *serv)
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
- dprintk("--> %s return %ld\n", __func__,
- IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
+ dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp));
return rqstp;
}
@@ -208,7 +210,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
struct svc_rqst *rqstp;
int (*callback_svc)(void *vrqstp);
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
- char svc_name[12];
int ret;
nfs_callback_bc_serv(minorversion, xprt, serv);
@@ -232,10 +233,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
svc_sock_update_bufs(serv);
- sprintf(svc_name, "nfsv4.%u-svc", minorversion);
cb_info->serv = serv;
cb_info->rqst = rqstp;
- cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name);
+ cb_info->task = kthread_run(callback_svc, cb_info->rqst,
+ "nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task);
svc_exit_thread(cb_info->rqst);
@@ -279,6 +280,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n
ret = nfs4_callback_up_net(serv, net);
break;
case 1:
+ case 2:
ret = nfs41_callback_up_net(serv, net);
break;
default:
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index efd54f0a4c4..84326e9fb47 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -32,6 +32,8 @@ enum nfs4_callback_opnum {
OP_CB_WANTS_CANCELLED = 12,
OP_CB_NOTIFY_LOCK = 13,
OP_CB_NOTIFY_DEVICEID = 14,
+/* Callback operations new to NFSv4.2 */
+ OP_CB_OFFLOAD = 15,
OP_CB_ILLEGAL = 10044,
};
@@ -39,6 +41,7 @@ struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
u32 slotid;
+ u32 minorversion;
struct net *net;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 264d1aa935f..41db5258e7a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -15,6 +15,7 @@
#include "internal.h"
#include "pnfs.h"
#include "nfs4session.h"
+#include "nfs4trace.h"
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -93,6 +94,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
default:
res = htonl(NFS4ERR_RESOURCE);
}
+ trace_nfs4_recall_delegation(inode, -ntohl(res));
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -110,7 +112,8 @@ out:
* TODO: keep track of all layouts (and delegations) in a hash table
* hashed by filehandle.
*/
-static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct nfs_server *server;
struct inode *ino;
@@ -118,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
+ continue;
if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
if (!ino)
- continue;
+ break;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
if (NFS_I(ino)->layout != lo) {
spin_unlock(&ino->i_lock);
iput(ino);
- continue;
+ break;
}
pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
@@ -139,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
return NULL;
}
-static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct pnfs_layout_hdr *lo;
spin_lock(&clp->cl_lock);
rcu_read_lock();
- lo = get_layout_by_fh_locked(clp, fh);
+ lo = get_layout_by_fh_locked(clp, fh, stateid);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
@@ -160,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp,
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);
- lo = get_layout_by_fh(clp, &args->cbl_fh);
+ lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
if (!lo)
- return NFS4ERR_NOMATCHING_LAYOUT;
+ goto out;
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
@@ -177,66 +183,22 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
iput(ino);
+out:
return rv;
}
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
- struct nfs_server *server;
- struct pnfs_layout_hdr *lo;
- struct inode *ino;
- u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
- struct pnfs_layout_hdr *tmp;
- LIST_HEAD(recall_list);
- LIST_HEAD(free_me_list);
- struct pnfs_layout_range range = {
- .iomode = IOMODE_ANY,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
-
- spin_lock(&clp->cl_lock);
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- if ((args->cbl_recall_type == RETURN_FSID) &&
- memcmp(&server->fsid, &args->cbl_fsid,
- sizeof(struct nfs_fsid)))
- continue;
-
- list_for_each_entry(lo, &server->layouts, plh_layouts) {
- ino = igrab(lo->plh_inode);
- if (!ino)
- continue;
- spin_lock(&ino->i_lock);
- /* Is this layout in the process of being freed? */
- if (NFS_I(ino)->layout != lo) {
- spin_unlock(&ino->i_lock);
- iput(ino);
- continue;
- }
- pnfs_get_layout_hdr(lo);
- spin_unlock(&ino->i_lock);
- list_add(&lo->plh_bulk_recall, &recall_list);
- }
- }
- rcu_read_unlock();
- spin_unlock(&clp->cl_lock);
+ int stat;
- list_for_each_entry_safe(lo, tmp,
- &recall_list, plh_bulk_recall) {
- ino = lo->plh_inode;
- spin_lock(&ino->i_lock);
- set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
- rv = NFS4ERR_DELAY;
- list_del_init(&lo->plh_bulk_recall);
- spin_unlock(&ino->i_lock);
- pnfs_free_lseg_list(&free_me_list);
- pnfs_put_layout_hdr(lo);
- iput(ino);
- }
- return rv;
+ if (args->cbl_recall_type == RETURN_FSID)
+ stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
+ else
+ stat = pnfs_destroy_layouts_byclid(clp, true);
+ if (stat != 0)
+ return NFS4ERR_DELAY;
+ return NFS4ERR_NOMATCHING_LAYOUT;
}
static u32 do_callback_layoutrecall(struct nfs_client *clp,
@@ -346,14 +308,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
{
struct nfs4_slot *slot;
- dprintk("%s enter. slotid %d seqid %d\n",
+ dprintk("%s enter. slotid %u seqid %u\n",
__func__, args->csa_slotid, args->csa_sequenceid);
if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
return htonl(NFS4ERR_BADSLOT);
slot = tbl->slots + args->csa_slotid;
- dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
+ dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
/* Normal */
if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
@@ -363,7 +325,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
- dprintk("%s seqid %d is a replay\n",
+ dprintk("%s seqid %u is a replay\n",
__func__, args->csa_sequenceid);
/* Signal process_op to set this error on next op */
if (args->csa_cachethis == 0)
@@ -451,7 +413,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
int i;
__be32 status = htonl(NFS4ERR_BADSESSION);
- clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid);
+ clp = nfs4_find_client_sessionid(cps->net, args->csa_addr,
+ &args->csa_sessionid, cps->minorversion);
if (clp == NULL)
goto out;
@@ -459,7 +422,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
- if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
spin_unlock(&tbl->slot_tbl_lock);
status = htonl(NFS4ERR_DELAY);
/* Return NFS4ERR_BADSESSION if we're draining the session
@@ -506,6 +469,7 @@ out:
} else
res->csr_status = status;
+ trace_nfs4_cb_sequence(args, res, status);
dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
ntohl(status), ntohl(res->csr_status));
return status;
@@ -545,7 +509,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
&args->craa_type_mask))
pnfs_recall_all_layouts(cps->clp);
if (flags)
- nfs_expire_all_delegation_types(cps->clp, flags);
+ nfs_expire_unused_delegation_types(cps->clp, flags);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
@@ -562,7 +526,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
if (!cps->clp) /* set in cb_sequence */
goto out;
- dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n",
+ dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %u\n",
rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
args->crsa_target_highest_slotid);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 59461c957d9..f4ccfe6521e 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -166,9 +166,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
hdr->minorversion = ntohl(*p++);
- /* Check minor version is zero or one. */
- if (hdr->minorversion <= 1) {
- hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */
+ /* Check for minor version support */
+ if (hdr->minorversion <= NFS4_MAX_MINOR_VERSION) {
+ hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 and v4.2 */
} else {
pr_warn_ratelimited("NFS: %s: NFSv4 server callback with "
"illegal minor version %u!\n",
@@ -763,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
* A single slot, so highest used slotid is either 0 or -1
*/
tbl->highest_used_slotid = NFS4_NO_SLOT;
- nfs4_session_drain_complete(session, tbl);
+ nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
@@ -786,6 +786,26 @@ static void nfs4_cb_free_slot(struct cb_process_state *cps)
}
#endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+static __be32
+preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op)
+{
+ __be32 status = preprocess_nfs41_op(nop, op_nr, op);
+ if (status != htonl(NFS4ERR_OP_ILLEGAL))
+ return status;
+
+ if (op_nr == OP_CB_OFFLOAD)
+ return htonl(NFS4ERR_NOTSUPP);
+ return htonl(NFS4ERR_OP_ILLEGAL);
+}
+#else /* CONFIG_NFS_V4_2 */
+static __be32
+preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op)
+{
+ return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+}
+#endif /* CONFIG_NFS_V4_2 */
+
static __be32
preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
{
@@ -801,8 +821,7 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
return htonl(NFS_OK);
}
-static __be32 process_op(uint32_t minorversion, int nop,
- struct svc_rqst *rqstp,
+static __be32 process_op(int nop, struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
struct xdr_stream *xdr_out, void *resp,
struct cb_process_state *cps)
@@ -819,10 +838,22 @@ static __be32 process_op(uint32_t minorversion, int nop,
return status;
dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
- __func__, minorversion, nop, op_nr);
+ __func__, cps->minorversion, nop, op_nr);
+
+ switch (cps->minorversion) {
+ case 0:
+ status = preprocess_nfs4_op(op_nr, &op);
+ break;
+ case 1:
+ status = preprocess_nfs41_op(nop, op_nr, &op);
+ break;
+ case 2:
+ status = preprocess_nfs42_op(nop, op_nr, &op);
+ break;
+ default:
+ status = htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+ }
- status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) :
- preprocess_nfs4_op(op_nr, &op);
if (status == htonl(NFS4ERR_OP_ILLEGAL))
op_nr = OP_CB_ILLEGAL;
if (status)
@@ -885,14 +916,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
return rpc_drop_reply;
}
+ cps.minorversion = hdr_arg.minorversion;
hdr_res.taglen = hdr_arg.taglen;
hdr_res.tag = hdr_arg.tag;
if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0)
return rpc_system_err;
while (status == 0 && nops != hdr_arg.nops) {
- status = process_op(hdr_arg.minorversion, nops, rqstp,
- &xdr_in, argp, &xdr_out, resp, &cps);
+ status = process_op(nops, rqstp, &xdr_in,
+ argp, &xdr_out, resp, &cps);
nops++;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9f3c66438d0..1d09289c8f0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -197,7 +197,6 @@ error_0:
EXPORT_SYMBOL_GPL(nfs_alloc_client);
#if IS_ENABLED(CONFIG_NFS_V4)
-/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
void nfs_cleanup_cb_ident_idr(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
@@ -502,8 +501,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
new->cl_flags = cl_init->init_flags;
- return rpc_ops->init_client(new, timeparms, ip_addr,
- authflavour);
+ return rpc_ops->init_client(new, timeparms, ip_addr);
}
spin_unlock(&nn->nfs_client_lock);
@@ -592,8 +590,12 @@ int nfs_create_rpc_client(struct nfs_client *clp,
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_DISCRTRY;
+ if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT;
if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags))
+ args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@@ -693,13 +695,12 @@ EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
* @clp: nfs_client to initialise
* @timeparms: timeout parameters for underlying RPC transport
* @ip_addr: IP presentation address (not used)
- * @authflavor: authentication flavor for underlying RPC transport
*
* Returns pointer to an NFS client, or an ERR_PTR value.
*/
struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour)
+ const char *ip_addr)
{
int error;
@@ -752,8 +753,6 @@ static int nfs_init_server(struct nfs_server *server,
data->timeo, data->retrans);
if (data->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
- if (server->options & NFS_OPTION_MIGRATION)
- set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
@@ -787,8 +786,10 @@ static int nfs_init_server(struct nfs_server *server,
goto error;
server->port = data->nfs_server.port;
+ server->auth_info = data->auth_info;
- error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+ error = nfs_init_server_rpcclient(server, &timeparms,
+ data->selected_flavor);
if (error < 0)
goto error;
@@ -929,6 +930,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acdirmax = source->acdirmax;
target->caps = source->caps;
target->options = source->options;
+ target->auth_info = source->auth_info;
}
EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
@@ -946,7 +948,7 @@ void nfs_server_insert_lists(struct nfs_server *server)
}
EXPORT_SYMBOL_GPL(nfs_server_insert_lists);
-static void nfs_server_remove_lists(struct nfs_server *server)
+void nfs_server_remove_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs_net *nn;
@@ -963,6 +965,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
synchronize_rcu();
}
+EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
/*
* Allocate and initialise a server record
@@ -1075,7 +1078,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
}
if (!(fattr->valid & NFS_ATTR_FATTR)) {
- error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr);
+ error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL);
if (error < 0) {
dprintk("nfs_create_server: getattr error = %d\n", -error);
goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf3..5d8ccecf5f5 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -20,6 +20,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
#include "internal.h"
+#include "nfs4trace.h"
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
@@ -55,7 +56,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (delegation != NULL && (delegation->type & flags) == flags) {
+ if (delegation != NULL && (delegation->type & flags) == flags &&
+ !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
nfs_mark_delegation_referenced(delegation);
ret = 1;
}
@@ -63,7 +65,7 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
return ret;
}
-static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
+static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
@@ -72,20 +74,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
if (inode->i_flock == NULL)
goto out;
- /* Protect inode->i_flock using the file locks lock */
- lock_flocks();
+ /* Protect inode->i_flock using the i_lock */
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
- unlock_flocks();
- status = nfs4_lock_delegation_recall(state, fl);
+ spin_unlock(&inode->i_lock);
+ status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
- lock_flocks();
+ spin_lock(&inode->i_lock);
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
out:
return status;
}
@@ -94,7 +96,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
+ struct nfs4_state_owner *sp;
struct nfs4_state *state;
+ unsigned int seq;
int err;
again:
@@ -109,9 +113,16 @@ again:
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
+ sp = state->owner;
+ /* Block nfs4_proc_unlck */
+ mutex_lock(&sp->so_delegreturn_mutex);
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid);
- if (err >= 0)
- err = nfs_delegation_claim_locks(ctx, state);
+ if (!err)
+ err = nfs_delegation_claim_locks(ctx, state, stateid);
+ if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+ err = -EAGAIN;
+ mutex_unlock(&sp->so_delegreturn_mutex);
put_nfs_open_context(ctx);
if (err != 0)
return err;
@@ -150,6 +161,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
spin_unlock(&delegation->lock);
put_rpccred(oldcred);
rcu_read_unlock();
+ trace_nfs4_reclaim_delegation(inode, res->delegation_type);
} else {
/* We appear to have raced with a delegation return. */
spin_unlock(&delegation->lock);
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
}
static struct nfs_delegation *
+nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
+{
+ struct nfs_delegation *ret = NULL;
+ struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+ if (delegation == NULL)
+ goto out;
+ spin_lock(&delegation->lock);
+ if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ ret = delegation;
+ spin_unlock(&delegation->lock);
+out:
+ return ret;
+}
+
+static struct nfs_delegation *
+nfs_start_delegation_return(struct nfs_inode *nfsi)
+{
+ struct nfs_delegation *delegation;
+
+ rcu_read_lock();
+ delegation = nfs_start_delegation_return_locked(nfsi);
+ rcu_read_unlock();
+ return delegation;
+}
+
+static void
+nfs_abort_delegation_return(struct nfs_delegation *delegation,
+ struct nfs_client *clp)
+{
+
+ spin_lock(&delegation->lock);
+ clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
+ set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+ spin_unlock(&delegation->lock);
+ set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+}
+
+static struct nfs_delegation *
nfs_detach_delegation_locked(struct nfs_inode *nfsi,
- struct nfs_server *server)
+ struct nfs_delegation *delegation,
+ struct nfs_client *clp)
{
- struct nfs_delegation *delegation =
+ struct nfs_delegation *deleg_cur =
rcu_dereference_protected(nfsi->delegation,
- lockdep_is_held(&server->nfs_client->cl_lock));
+ lockdep_is_held(&clp->cl_lock));
- if (delegation == NULL)
- goto nomatch;
+ if (deleg_cur == NULL || delegation != deleg_cur)
+ return NULL;
spin_lock(&delegation->lock);
+ set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
nfsi->delegation_state = 0;
rcu_assign_pointer(nfsi->delegation, NULL);
spin_unlock(&delegation->lock);
return delegation;
-nomatch:
- return NULL;
}
static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
- struct nfs_server *server)
+ struct nfs_delegation *delegation,
+ struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
- struct nfs_delegation *delegation;
spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, server);
+ delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
spin_unlock(&clp->cl_lock);
return delegation;
}
+static struct nfs_delegation *
+nfs_inode_detach_delegation(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_delegation *delegation;
+
+ delegation = nfs_start_delegation_return(nfsi);
+ if (delegation == NULL)
+ return NULL;
+ return nfs_detach_delegation(nfsi, delegation, server);
+}
+
/**
* nfs_inode_set_delegation - set up a delegation on an inode
* @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi, server);
+ freeme = nfs_detach_delegation_locked(nfsi,
+ old_delegation, clp);
+ if (freeme == NULL)
+ goto out;
}
list_add_rcu(&delegation->super_list, &server->delegations);
nfsi->delegation_state = delegation->type;
@@ -279,6 +346,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
spin_lock(&inode->i_lock);
nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
+ trace_nfs4_set_delegation(inode, res->delegation_type);
out:
spin_unlock(&clp->cl_lock);
@@ -292,19 +360,29 @@ out:
/*
* Basic procedure for returning a delegation to the server
*/
-static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
int err;
- /*
- * Guard against new delegated open/lock/unlock calls and against
- * state recovery
- */
- down_write(&nfsi->rwsem);
- err = nfs_delegation_claim_opens(inode, &delegation->stateid);
- up_write(&nfsi->rwsem);
- if (err)
+ if (delegation == NULL)
+ return 0;
+ do {
+ err = nfs_delegation_claim_opens(inode, &delegation->stateid);
+ if (!issync || err != -EAGAIN)
+ break;
+ /*
+ * Guard against state recovery
+ */
+ err = nfs4_wait_clnt_recover(clp);
+ } while (err == 0);
+
+ if (err) {
+ nfs_abort_delegation_return(delegation, clp);
+ goto out;
+ }
+ if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
goto out;
err = nfs_do_return_delegation(inode, delegation, issync);
@@ -312,6 +390,24 @@ out:
return err;
}
+static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
+{
+ bool ret = false;
+
+ if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
+ ret = true;
+ if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
+ struct inode *inode;
+
+ spin_lock(&delegation->lock);
+ inode = delegation->inode;
+ if (inode && list_empty(&NFS_I(inode)->open_files))
+ ret = true;
+ spin_unlock(&delegation->lock);
+ }
+ return ret;
+}
+
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
@@ -334,19 +430,15 @@ restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
- if (!test_and_clear_bit(NFS_DELEGATION_RETURN,
- &delegation->flags))
+ if (!nfs_delegation_need_return(delegation))
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
continue;
- delegation = nfs_detach_delegation(NFS_I(inode),
- server);
+ delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
- if (delegation != NULL)
- err = __nfs_inode_return_delegation(inode,
- delegation, 0);
+ err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
if (!err)
goto restart;
@@ -367,15 +459,11 @@ restart:
*/
void nfs_inode_return_delegation_noreclaim(struct inode *inode)
{
- struct nfs_server *server = NFS_SERVER(inode);
- struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
- if (rcu_access_pointer(nfsi->delegation) != NULL) {
- delegation = nfs_detach_delegation(nfsi, server);
- if (delegation != NULL)
- nfs_do_return_delegation(inode, delegation, 0);
- }
+ delegation = nfs_inode_detach_delegation(inode);
+ if (delegation != NULL)
+ nfs_do_return_delegation(inode, delegation, 0);
}
/**
@@ -390,21 +478,24 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
*/
int nfs4_inode_return_delegation(struct inode *inode)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
int err = 0;
nfs_wb_all(inode);
- if (rcu_access_pointer(nfsi->delegation) != NULL) {
- delegation = nfs_detach_delegation(nfsi, server);
- if (delegation != NULL) {
- err = __nfs_inode_return_delegation(inode, delegation, 1);
- }
- }
+ delegation = nfs_start_delegation_return(nfsi);
+ if (delegation != NULL)
+ err = nfs_end_delegation_return(inode, delegation, 1);
return err;
}
+static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
+{
+ set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+}
+
static void nfs_mark_return_delegation(struct nfs_server *server,
struct nfs_delegation *delegation)
{
@@ -412,6 +503,45 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
+static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
+{
+ struct nfs_delegation *delegation;
+ bool ret = false;
+
+ list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ nfs_mark_return_delegation(server, delegation);
+ ret = true;
+ }
+ return ret;
+}
+
+static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
+{
+ struct nfs_server *server;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
+ nfs_server_mark_return_all_delegations(server);
+ rcu_read_unlock();
+}
+
+static void nfs_delegation_run_state_manager(struct nfs_client *clp)
+{
+ if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
+ nfs4_schedule_state_manager(clp);
+}
+
+/**
+ * nfs_expire_all_delegations
+ * @clp: client to process
+ *
+ */
+void nfs_expire_all_delegations(struct nfs_client *clp)
+{
+ nfs_client_mark_return_all_delegations(clp);
+ nfs_delegation_run_state_manager(clp);
+}
+
/**
* nfs_super_return_all_delegations - return delegations for one superblock
* @sb: sb to process
@@ -420,24 +550,22 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
void nfs_server_return_all_delegations(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
- struct nfs_delegation *delegation;
+ bool need_wait;
if (clp == NULL)
return;
rcu_read_lock();
- list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
- spin_lock(&delegation->lock);
- set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
- spin_unlock(&delegation->lock);
- }
+ need_wait = nfs_server_mark_return_all_delegations(server);
rcu_read_unlock();
- if (nfs_client_return_marked_delegations(clp) != 0)
+ if (need_wait) {
nfs4_schedule_state_manager(clp);
+ nfs4_wait_clnt_recover(clp);
+ }
}
-static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
+static void nfs_mark_return_unused_delegation_types(struct nfs_server *server,
fmode_t flags)
{
struct nfs_delegation *delegation;
@@ -446,32 +574,26 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(server, delegation);
+ nfs_mark_return_if_closed_delegation(server, delegation);
}
}
-static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp,
+static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp,
fmode_t flags)
{
struct nfs_server *server;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
- nfs_mark_return_all_delegation_types(server, flags);
+ nfs_mark_return_unused_delegation_types(server, flags);
rcu_read_unlock();
}
-static void nfs_delegation_run_state_manager(struct nfs_client *clp)
-{
- if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
- nfs4_schedule_state_manager(clp);
-}
-
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
- delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
+ delegation = nfs_inode_detach_delegation(inode);
if (delegation) {
nfs_inode_find_state_and_recover(inode, &delegation->stateid);
nfs_free_delegation(delegation);
@@ -480,27 +602,17 @@ void nfs_remove_bad_delegation(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
/**
- * nfs_expire_all_delegation_types
+ * nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
*
*/
-void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags)
+void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags)
{
- nfs_client_mark_return_all_delegation_types(clp, flags);
+ nfs_client_mark_return_unused_delegation_types(clp, flags);
nfs_delegation_run_state_manager(clp);
}
-/**
- * nfs_expire_all_delegations
- * @clp: client to process
- *
- */
-void nfs_expire_all_delegations(struct nfs_client *clp)
-{
- nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE);
-}
-
static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
{
struct nfs_delegation *delegation;
@@ -508,7 +620,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(server, delegation);
+ nfs_mark_return_if_closed_delegation(server, delegation);
}
}
@@ -547,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL)
+ goto out_enoent;
- if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
- rcu_read_unlock();
- return -ENOENT;
- }
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
+ goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
+out_enoent:
+ rcu_read_unlock();
+ return -ENOENT;
}
static struct inode *
@@ -649,7 +764,7 @@ restart:
if (inode == NULL)
continue;
delegation = nfs_detach_delegation(NFS_I(inode),
- server);
+ delegation, server);
rcu_read_unlock();
if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d..9a79c7a99d6 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -28,7 +28,9 @@ struct nfs_delegation {
enum {
NFS_DELEGATION_NEED_RECLAIM = 0,
NFS_DELEGATION_RETURN,
+ NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
+ NFS_DELEGATION_RETURNING,
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -40,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
void nfs_server_return_all_delegations(struct nfs_server *);
void nfs_expire_all_delegations(struct nfs_client *clp);
-void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
+void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
@@ -52,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
/* NFSv4 delegation-related procedures */
int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
-int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1b2d7eb9379..4a3d4ef7612 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,6 +33,7 @@
#include <linux/pagevec.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/swap.h>
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/xattr.h>
@@ -42,11 +43,13 @@
#include "internal.h"
#include "fscache.h"
+#include "nfstrace.h"
+
/* #define NFS_DEBUG_VERBOSE 1 */
static int nfs_opendir(struct inode *, struct file *);
static int nfs_closedir(struct inode *, struct file *);
-static int nfs_readdir(struct file *, void *, filldir_t);
+static int nfs_readdir(struct file *, struct dir_context *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
static void nfs_readdir_clear_array(struct page*);
@@ -54,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
.read = generic_read_dir,
- .readdir = nfs_readdir,
+ .iterate = nfs_readdir,
.open = nfs_opendir,
.release = nfs_closedir,
.fsync = nfs_fsync_dir,
@@ -66,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = {
static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
{
+ struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_open_dir_context *ctx;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx != NULL) {
ctx->duped = 0;
- ctx->attr_gencount = NFS_I(dir)->attr_gencount;
+ ctx->attr_gencount = nfsi->attr_gencount;
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
ctx->cred = get_rpccred(cred);
+ spin_lock(&dir->i_lock);
+ list_add(&ctx->list, &nfsi->open_files);
+ spin_unlock(&dir->i_lock);
return ctx;
}
return ERR_PTR(-ENOMEM);
}
-static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
+static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
{
+ spin_lock(&dir->i_lock);
+ list_del(&ctx->list);
+ spin_unlock(&dir->i_lock);
put_rpccred(ctx->cred);
kfree(ctx);
}
@@ -95,9 +105,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
struct nfs_open_dir_context *ctx;
struct rpc_cred *cred;
- dfprintk(FILE, "NFS: open dir(%s/%s)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name);
+ dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
@@ -125,7 +133,7 @@ out:
static int
nfs_closedir(struct inode *inode, struct file *filp)
{
- put_nfs_open_dir_context(filp->private_data);
+ put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
return 0;
}
@@ -147,6 +155,7 @@ typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
typedef struct {
struct file *file;
struct page *page;
+ struct dir_context *ctx;
unsigned long page_index;
u64 *dir_cookie;
u64 last_cookie;
@@ -252,7 +261,7 @@ out:
static
int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
{
- loff_t diff = desc->file->f_pos - desc->current_index;
+ loff_t diff = desc->ctx->pos - desc->current_index;
unsigned int index;
if (diff < 0)
@@ -272,6 +281,15 @@ out_eof:
return -EBADCOOKIE;
}
+static bool
+nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
+{
+ if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
+ return false;
+ smp_rmb();
+ return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
+}
+
static
int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
{
@@ -281,25 +299,23 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
for (i = 0; i < array->size; i++) {
if (array->array[i].cookie == *desc->dir_cookie) {
- struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode);
+ struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
struct nfs_open_dir_context *ctx = desc->file->private_data;
new_pos = desc->current_index + i;
- if (ctx->attr_gencount != nfsi->attr_gencount
- || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) {
+ if (ctx->attr_gencount != nfsi->attr_gencount ||
+ !nfs_readdir_inode_mapping_valid(nfsi)) {
ctx->duped = 0;
ctx->attr_gencount = nfsi->attr_gencount;
- } else if (new_pos < desc->file->f_pos) {
+ } else if (new_pos < desc->ctx->pos) {
if (ctx->duped > 0
&& ctx->dup_cookie == *desc->dir_cookie) {
if (printk_ratelimit()) {
- pr_notice("NFS: directory %s/%s contains a readdir loop."
+ pr_notice("NFS: directory %pD2 contains a readdir loop."
"Please contact your server vendor. "
- "The file: %s has duplicate cookie %llu\n",
- desc->file->f_dentry->d_parent->d_name.name,
- desc->file->f_dentry->d_name.name,
- array->array[i].string.name,
- *desc->dir_cookie);
+ "The file: %.*s has duplicate cookie %llu\n",
+ desc->file, array->array[i].string.len,
+ array->array[i].string.name, *desc->dir_cookie);
}
status = -ELOOP;
goto out;
@@ -307,7 +323,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
ctx->dup_cookie = *desc->dir_cookie;
ctx->duped = -1;
}
- desc->file->f_pos = new_pos;
+ desc->ctx->pos = new_pos;
desc->cache_entry_index = i;
return 0;
}
@@ -405,13 +421,13 @@ different:
}
static
-bool nfs_use_readdirplus(struct inode *dir, struct file *filp)
+bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
return false;
if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
return true;
- if (filp->f_pos == 0)
+ if (ctx->pos == 0)
return true;
return false;
}
@@ -427,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir)
set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
}
+/*
+ * This function is mainly for use by nfs_getattr().
+ *
+ * If this is an 'ls -l', we want to force use of readdirplus.
+ * Do this by checking if there is an active file descriptor
+ * and calling nfs_advise_use_readdirplus, then forcing a
+ * cache flush.
+ */
+void nfs_force_use_readdirplus(struct inode *dir)
+{
+ if (!list_empty(&NFS_I(dir)->open_files)) {
+ nfs_advise_use_readdirplus(dir);
+ nfs_zap_mapping(dir, dir->i_mapping);
+ }
+}
+
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
@@ -435,6 +467,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
struct dentry *alias;
struct inode *dir = parent->d_inode;
struct inode *inode;
+ int status;
if (filename.name[0] == '.') {
if (filename.len == 1)
@@ -447,7 +480,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
dentry = d_lookup(parent, &filename);
if (dentry != NULL) {
if (nfs_same_file(dentry, entry)) {
- nfs_refresh_inode(dentry->d_inode, entry->fattr);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
+ if (!status)
+ nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
goto out;
} else {
if (d_invalidate(dentry) != 0)
@@ -460,7 +496,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
if (dentry == NULL)
return;
- inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+ inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label);
if (IS_ERR(inode))
goto out;
@@ -585,10 +621,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
if (entry.fh == NULL || entry.fattr == NULL)
goto out;
+ entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+ if (IS_ERR(entry.label)) {
+ status = PTR_ERR(entry.label);
+ goto out;
+ }
+
array = nfs_readdir_get_array(page);
if (IS_ERR(array)) {
status = PTR_ERR(array);
- goto out;
+ goto out_label_free;
}
memset(array, 0, sizeof(struct nfs_cache_array));
array->eof_index = -1;
@@ -614,6 +656,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
nfs_readdir_free_large_page(pages_ptr, pages, array_size);
out_release_array:
nfs_readdir_release_array(page);
+out_label_free:
+ nfs4_label_free(entry.label);
out:
nfs_free_fattr(entry.fattr);
nfs_free_fhandle(entry.fh);
@@ -629,7 +673,7 @@ out:
static
int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
{
- struct inode *inode = desc->file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(desc->file);
int ret;
ret = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -660,7 +704,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
static
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
{
- return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+ return read_cache_page(file_inode(desc->file)->i_mapping,
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
}
@@ -702,8 +746,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
* Once we've found the start of the dirent within a page: fill 'er up...
*/
static
-int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
- filldir_t filldir)
+int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
{
struct file *file = desc->file;
int i = 0;
@@ -721,13 +764,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
struct nfs_cache_array_entry *ent;
ent = &array->array[i];
- if (filldir(dirent, ent->string.name, ent->string.len,
- file->f_pos, nfs_compat_user_ino64(ent->ino),
- ent->d_type) < 0) {
+ if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
+ nfs_compat_user_ino64(ent->ino), ent->d_type)) {
desc->eof = 1;
break;
}
- file->f_pos++;
+ desc->ctx->pos++;
if (i < (array->size-1))
*desc->dir_cookie = array->array[i+1].cookie;
else
@@ -759,12 +801,11 @@ out:
* directory in the page cache by the time we get here.
*/
static inline
-int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
- filldir_t filldir)
+int uncached_readdir(nfs_readdir_descriptor_t *desc)
{
struct page *page = NULL;
int status;
- struct inode *inode = desc->file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(desc->file);
struct nfs_open_dir_context *ctx = desc->file->private_data;
dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
@@ -785,7 +826,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
if (status < 0)
goto out_release;
- status = nfs_do_filldir(desc, dirent, filldir);
+ status = nfs_do_filldir(desc);
out:
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
@@ -796,39 +837,51 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
goto out;
}
+static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
+{
+ struct nfs_inode *nfsi = NFS_I(dir);
+
+ if (nfs_attribute_cache_expired(dir))
+ return true;
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return true;
+ return false;
+}
+
/* The file offset position represents the dirent entry number. A
last cookie cache takes care of the common case of reading the
whole directory.
*/
-static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int nfs_readdir(struct file *file, struct dir_context *ctx)
{
- struct dentry *dentry = filp->f_path.dentry;
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
nfs_readdir_descriptor_t my_desc,
*desc = &my_desc;
- struct nfs_open_dir_context *dir_ctx = filp->private_data;
- int res;
+ struct nfs_open_dir_context *dir_ctx = file->private_data;
+ int res = 0;
- dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (long long)filp->f_pos);
+ dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
+ file, (long long)ctx->pos);
nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
/*
- * filp->f_pos points to the dirent entry number.
+ * ctx->pos points to the dirent entry number.
* *desc->dir_cookie has the cookie for the next entry. We have
* to either find the entry with the appropriate number or
* revalidate the cookie.
*/
memset(desc, 0, sizeof(*desc));
- desc->file = filp;
+ desc->file = file;
+ desc->ctx = ctx;
desc->dir_cookie = &dir_ctx->dir_cookie;
desc->decode = NFS_PROTO(inode)->decode_dirent;
- desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0;
+ desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
nfs_block_sillyrename(dentry);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
+ if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
+ res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
goto out;
@@ -840,7 +893,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
/* This means either end of directory */
if (*desc->dir_cookie && desc->eof == 0) {
/* Or that the server has 'lost' a cookie */
- res = uncached_readdir(desc, dirent, filldir);
+ res = uncached_readdir(desc);
if (res == 0)
continue;
}
@@ -857,7 +910,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (res < 0)
break;
- res = nfs_do_filldir(desc, dirent, filldir);
+ res = nfs_do_filldir(desc);
if (res < 0)
break;
} while (!desc->eof);
@@ -865,22 +918,17 @@ out:
nfs_unblock_sillyrename(dentry);
if (res > 0)
res = 0;
- dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- res);
+ dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
return res;
}
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct nfs_open_dir_context *dir_ctx = filp->private_data;
- dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name,
- offset, whence);
+ dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
+ filp, offset, whence);
mutex_lock(&inode->i_mutex);
switch (whence) {
@@ -910,15 +958,12 @@ out:
static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
- dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- datasync);
+ dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
mutex_lock(&inode->i_mutex);
- nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
+ nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
mutex_unlock(&inode->i_mutex);
return 0;
}
@@ -1040,6 +1085,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct dentry *parent;
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
+ struct nfs4_label *label = NULL;
int error;
if (flags & LOOKUP_RCU)
@@ -1057,9 +1103,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
}
if (is_bad_inode(inode)) {
- dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
- __func__, dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+ __func__, dentry);
goto out_bad;
}
@@ -1082,7 +1127,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (fhandle == NULL || fattr == NULL)
goto out_error;
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+ if (IS_ERR(label))
+ goto out_error;
+
+ trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
if (error)
goto out_bad;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1090,8 +1141,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
goto out_bad;
+ nfs_setsecurity(inode, fattr, label);
+
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
+ nfs4_label_free(label);
+
out_set_verifier:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
@@ -1099,50 +1154,90 @@ out_set_verifier:
nfs_advise_use_readdirplus(dir);
out_valid_noent:
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
- __func__, dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+ __func__, dentry);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
+ nfs4_label_free(label);
nfs_mark_for_revalidate(dir);
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
- /* If we have submounts, don't unhash ! */
- if (have_submounts(dentry))
- goto out_valid;
- if (dentry->d_flags & DCACHE_DISCONNECTED)
+ /*
+ * We can't d_drop the root of a disconnected tree:
+ * its d_hash is on the s_anon list and d_drop() would hide
+ * it from shrink_dcache_for_unmount(), leading to busy
+ * inodes on unmount and further oopses.
+ */
+ if (IS_ROOT(dentry))
goto out_valid;
- shrink_dcache_parent(dentry);
}
- d_drop(dentry);
+ /* If we have submounts, don't unhash ! */
+ if (check_submounts_and_drop(dentry) != 0)
+ goto out_valid;
+
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
- __func__, dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+ __func__, dentry);
return 0;
out_error:
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
+ nfs4_label_free(label);
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
- __func__, dentry->d_parent->d_name.name,
- dentry->d_name.name, error);
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+ __func__, dentry, error);
return error;
}
/*
+ * A weaker form of d_revalidate for revalidating just the dentry->d_inode
+ * when we don't really care about the dentry name. This is called when a
+ * pathwalk ends on a dentry that was not found via a normal lookup in the
+ * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
+ *
+ * In this situation, we just want to verify that the inode itself is OK
+ * since the dentry might have changed on the server.
+ */
+static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ int error;
+ struct inode *inode = dentry->d_inode;
+
+ /*
+ * I believe we can only get a negative dentry here in the case of a
+ * procfs-style symlink. Just assume it's correct for now, but we may
+ * eventually need to do something more here.
+ */
+ if (!inode) {
+ dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
+ __func__, dentry);
+ return 1;
+ }
+
+ if (is_bad_inode(inode)) {
+ dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
+ __func__, dentry);
+ return 0;
+ }
+
+ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
+ __func__, inode->i_ino, error ? "invalid" : "valid");
+ return !error;
+}
+
+/*
* This is called from dput() when d_count is going to 0.
*/
static int nfs_dentry_delete(const struct dentry *dentry)
{
- dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- dentry->d_flags);
+ dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
+ dentry, dentry->d_flags);
/* Unhash any dentry with a stale inode */
if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
@@ -1202,6 +1297,7 @@ static void nfs_d_release(struct dentry *dentry)
const struct dentry_operations nfs_dentry_operations = {
.d_revalidate = nfs_lookup_revalidate,
+ .d_weak_revalidate = nfs_weak_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
.d_automount = nfs_d_automount,
@@ -1216,10 +1312,10 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
struct inode *inode = NULL;
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
+ struct nfs4_label *label = NULL;
int error;
- dfprintk(VFS, "NFS: lookup(%s/%s)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
res = ERR_PTR(-ENAMETOOLONG);
@@ -1242,17 +1338,22 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
if (fhandle == NULL || fattr == NULL)
goto out;
+ label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT);
+ if (IS_ERR(label))
+ goto out;
+
parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
+ trace_nfs_lookup_enter(dir, dentry, flags);
nfs_block_sillyrename(parent);
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
res = ERR_PTR(error);
goto out_unblock_sillyrename;
}
- inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
res = ERR_CAST(inode);
if (IS_ERR(res))
goto out_unblock_sillyrename;
@@ -1270,6 +1371,8 @@ no_entry:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_unblock_sillyrename:
nfs_unblock_sillyrename(parent);
+ trace_nfs_lookup_exit(dir, dentry, flags, error);
+ nfs4_label_free(label);
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
@@ -1306,7 +1409,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i
static int do_open(struct inode *inode, struct file *filp)
{
- nfs_fscache_set_inode_cookie(inode, filp);
+ nfs_fscache_open_file(inode, filp);
return 0;
}
@@ -1317,17 +1420,8 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
{
int err;
- if (ctx->dentry != dentry) {
- dput(ctx->dentry);
- ctx->dentry = dget(dentry);
- }
-
- /* If the open_intent is for execute, we have an extra check to make */
- if (ctx->mode & FMODE_EXEC) {
- err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
- if (err < 0)
- goto out;
- }
+ if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ *opened |= FILE_CREATED;
err = finish_open(file, dentry, do_open, opened);
if (err)
@@ -1335,7 +1429,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
nfs_file_set_open_context(file, ctx);
out:
- put_nfs_open_context(ctx);
return err;
}
@@ -1347,13 +1440,18 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
+ unsigned int lookup_flags = 0;
int err;
/* Expect a negative dentry */
BUG_ON(dentry->d_inode);
- dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
+ dir->i_sb->s_id, dir->i_ino, dentry);
+
+ err = nfs_check_flags(open_flags);
+ if (err)
+ return err;
/* NFS only supports OPEN on regular files */
if ((open_flags & O_DIRECTORY)) {
@@ -1365,6 +1463,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
*/
return -ENOENT;
}
+ lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
goto no_open;
}
@@ -1385,15 +1484,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (IS_ERR(ctx))
goto out;
+ trace_nfs_atomic_open_enter(dir, ctx, open_flags);
nfs_block_sillyrename(dentry->d_parent);
- inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
- d_drop(dentry);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+ nfs_unblock_sillyrename(dentry->d_parent);
if (IS_ERR(inode)) {
- nfs_unblock_sillyrename(dentry->d_parent);
- put_nfs_open_context(ctx);
err = PTR_ERR(inode);
+ trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
+ put_nfs_open_context(ctx);
switch (err) {
case -ENOENT:
+ d_drop(dentry);
d_add(dentry, NULL);
break;
case -EISDIR:
@@ -1409,21 +1510,15 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
goto out;
}
- res = d_add_unique(dentry, inode);
- if (res != NULL)
- dentry = res;
-
- nfs_unblock_sillyrename(dentry->d_parent);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-
- err = nfs_finish_open(ctx, dentry, file, open_flags, opened);
- dput(res);
+ err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened);
+ trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
+ put_nfs_open_context(ctx);
out:
return err;
no_open:
- res = nfs_lookup(dir, dentry, 0);
+ res = nfs_lookup(dir, dentry, lookup_flags);
err = PTR_ERR(res);
if (IS_ERR(res))
goto out;
@@ -1446,6 +1541,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open;
if (d_mountpoint(dentry))
goto no_open;
+ if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ goto no_open;
inode = dentry->d_inode;
parent = dget_parent(dentry);
@@ -1486,7 +1583,8 @@ no_open:
* Code common to create, mkdir, and mknod.
*/
int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr,
+ struct nfs4_label *label)
{
struct dentry *parent = dget_parent(dentry);
struct inode *dir = parent->d_inode;
@@ -1499,18 +1597,18 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
if (dentry->d_inode)
goto out;
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
if (error)
goto out_error;
}
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
if (!(fattr->valid & NFS_ATTR_FATTR)) {
struct nfs_server *server = NFS_SB(dentry->d_sb);
- error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
+ error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL);
if (error < 0)
goto out_error;
}
- inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
error = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_error;
@@ -1538,13 +1636,15 @@ int nfs_create(struct inode *dir, struct dentry *dentry,
int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
int error;
- dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
+ dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
+ trace_nfs_create_enter(dir, dentry, open_flags);
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+ trace_nfs_create_exit(dir, dentry, open_flags, error);
if (error != 0)
goto out_err;
return 0;
@@ -1563,8 +1663,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
struct iattr attr;
int status;
- dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
+ dir->i_sb->s_id, dir->i_ino, dentry);
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1572,7 +1672,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
+ trace_nfs_mknod_enter(dir, dentry);
status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
+ trace_nfs_mknod_exit(dir, dentry, status);
if (status != 0)
goto out_err;
return 0;
@@ -1590,13 +1692,15 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct iattr attr;
int error;
- dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
+ dir->i_sb->s_id, dir->i_ino, dentry);
attr.ia_valid = ATTR_MODE;
attr.ia_mode = mode | S_IFDIR;
+ trace_nfs_mkdir_enter(dir, dentry);
error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+ trace_nfs_mkdir_exit(dir, dentry, error);
if (error != 0)
goto out_err;
return 0;
@@ -1616,15 +1720,24 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
- dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
+ dir->i_sb->s_id, dir->i_ino, dentry);
- error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
- /* Ensure the VFS deletes this inode */
- if (error == 0 && dentry->d_inode != NULL)
- clear_nlink(dentry->d_inode);
- else if (error == -ENOENT)
- nfs_dentry_handle_enoent(dentry);
+ trace_nfs_rmdir_enter(dir, dentry);
+ if (dentry->d_inode) {
+ nfs_wait_on_sillyrename(dentry);
+ error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+ /* Ensure the VFS deletes this inode */
+ switch (error) {
+ case 0:
+ clear_nlink(dentry->d_inode);
+ break;
+ case -ENOENT:
+ nfs_dentry_handle_enoent(dentry);
+ }
+ } else
+ error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+ trace_nfs_rmdir_exit(dir, dentry, error);
return error;
}
@@ -1643,8 +1756,7 @@ static int nfs_safe_remove(struct dentry *dentry)
struct inode *inode = dentry->d_inode;
int error = -EBUSY;
- dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
/* If the dentry was sillyrenamed, we simply call d_delete() */
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1652,6 +1764,7 @@ static int nfs_safe_remove(struct dentry *dentry)
goto out;
}
+ trace_nfs_remove_enter(dir, dentry);
if (inode != NULL) {
NFS_PROTO(inode)->return_delegation(inode);
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
@@ -1661,6 +1774,7 @@ static int nfs_safe_remove(struct dentry *dentry)
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
if (error == -ENOENT)
nfs_dentry_handle_enoent(dentry);
+ trace_nfs_remove_exit(dir, dentry, error);
out:
return error;
}
@@ -1675,16 +1789,17 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
int error;
int need_rehash = 0;
- dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name);
+ dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry);
+ trace_nfs_unlink_enter(dir, dentry);
spin_lock(&dentry->d_lock);
- if (dentry->d_count > 1) {
+ if (d_count(dentry) > 1) {
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(dentry->d_inode, 0);
error = nfs_sillyrename(dir, dentry);
- return error;
+ goto out;
}
if (!d_unhashed(dentry)) {
__d_drop(dentry);
@@ -1696,6 +1811,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
} else if (need_rehash)
d_rehash(dentry);
+out:
+ trace_nfs_unlink_exit(dir, dentry, error);
return error;
}
EXPORT_SYMBOL_GPL(nfs_unlink);
@@ -1717,15 +1834,14 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
*/
int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
- struct pagevec lru_pvec;
struct page *page;
char *kaddr;
struct iattr attr;
unsigned int pathlen = strlen(symname);
int error;
- dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
- dir->i_ino, dentry->d_name.name, symname);
+ dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
+ dir->i_ino, dentry, symname);
if (pathlen > PAGE_SIZE)
return -ENAMETOOLONG;
@@ -1743,11 +1859,13 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
kunmap_atomic(kaddr);
+ trace_nfs_symlink_enter(dir, dentry);
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+ trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
- dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+ dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
- dentry->d_name.name, symname, error);
+ dentry, symname, error);
d_drop(dentry);
__free_page(page);
return error;
@@ -1757,13 +1875,15 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
* No big deal if we can't add this page to the page cache here.
* READLINK will get the missing page from the server if needed.
*/
- pagevec_init(&lru_pvec, 0);
- if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+ if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0,
GFP_KERNEL)) {
- pagevec_add(&lru_pvec, page);
- pagevec_lru_add_file(&lru_pvec);
SetPageUptodate(page);
unlock_page(page);
+ /*
+ * add_to_page_cache_lru() grabs an extra page refcount.
+ * Drop it here to avoid leaking this page later.
+ */
+ page_cache_release(page);
} else
__free_page(page);
@@ -1777,10 +1897,10 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
struct inode *inode = old_dentry->d_inode;
int error;
- dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
- old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
+ old_dentry, dentry);
+ trace_nfs_link_enter(inode, dir, dentry);
NFS_PROTO(inode)->return_delegation(inode);
d_drop(dentry);
@@ -1789,6 +1909,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
ihold(inode);
d_add(dentry, inode);
}
+ trace_nfs_link_exit(inode, dir, dentry, error);
return error;
}
EXPORT_SYMBOL_GPL(nfs_link);
@@ -1823,13 +1944,14 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct dentry *dentry = NULL, *rehash = NULL;
+ struct rpc_task *task;
int error = -EBUSY;
- dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
- old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
- new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
- new_dentry->d_count);
+ dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
+ old_dentry, new_dentry,
+ d_count(new_dentry));
+ trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
/*
* For non-directories, check whether the target is busy and if so,
* make a copy of the dentry and then do a silly-rename. If the
@@ -1846,7 +1968,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
rehash = new_dentry;
}
- if (new_dentry->d_count > 2) {
+ if (d_count(new_dentry) > 2) {
int err;
/* copy the target dentry's name */
@@ -1870,12 +1992,22 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
- new_dir, &new_dentry->d_name);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ goto out;
+ }
+
+ error = rpc_wait_for_completion_task(task);
+ if (error == 0)
+ error = task->tk_status;
+ rpc_put_task(task);
nfs_mark_for_revalidate(old_inode);
out:
if (rehash)
d_rehash(rehash);
+ trace_nfs_rename_exit(old_dir, old_dentry,
+ new_dir, new_dentry, error);
if (!error) {
if (new_inode != NULL)
nfs_drop_nlink(new_inode);
@@ -1900,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
kfree(entry);
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
- smp_mb__after_atomic_dec();
+ smp_mb__after_atomic();
}
static void nfs_access_free_list(struct list_head *head)
@@ -1916,17 +2048,18 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-int nfs_access_cache_shrinker(struct shrinker *shrink,
- struct shrink_control *sc)
+unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
LIST_HEAD(head);
struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache;
int nr_to_scan = sc->nr_to_scan;
gfp_t gfp_mask = sc->gfp_mask;
+ long freed = 0;
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return (nr_to_scan == 0) ? 0 : -1;
+ return SHRINK_STOP;
spin_lock(&nfs_access_lru_lock);
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
@@ -1942,21 +2075,28 @@ int nfs_access_cache_shrinker(struct shrinker *shrink,
struct nfs_access_entry, lru);
list_move(&cache->lru, &head);
rb_erase(&cache->rb_node, &nfsi->access_cache);
+ freed++;
if (!list_empty(&nfsi->access_cache_entry_lru))
list_move_tail(&nfsi->access_cache_inode_lru,
&nfs_access_lru_list);
else {
remove_lru_entry:
list_del_init(&nfsi->access_cache_inode_lru);
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
}
spin_unlock(&inode->i_lock);
}
spin_unlock(&nfs_access_lru_lock);
nfs_access_free_list(&head);
- return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+ return freed;
+}
+
+unsigned long
+nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
}
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
@@ -2092,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
nfs_access_add_rbtree(inode, cache);
/* Update accounting */
- smp_mb__before_atomic_inc();
+ smp_mb__before_atomic();
atomic_long_inc(&nfs_access_nr_entries);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
/* Add inode to global LRU list */
if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
@@ -2125,9 +2265,11 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
struct nfs_access_entry cache;
int status;
+ trace_nfs_access_enter(inode);
+
status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0)
- goto out;
+ goto out_cached;
/* Be clever: ask server to check for all possible rights */
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
@@ -2140,13 +2282,15 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
if (!S_ISDIR(inode->i_mode))
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
}
- return status;
+ goto out;
}
nfs_access_add_cache(inode, &cache);
+out_cached:
+ if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
+ status = -EACCES;
out:
- if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
- return 0;
- return -EACCES;
+ trace_nfs_access_exit(inode, status);
+ return status;
}
static int nfs_open_permission_mask(int openflags)
@@ -2192,11 +2336,6 @@ int nfs_permission(struct inode *inode, int mask)
case S_IFLNK:
goto out;
case S_IFREG:
- /* NFSv4 has atomic_open... */
- if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
- && (mask & MAY_OPEN)
- && !(mask & MAY_EXEC))
- goto out;
break;
case S_IFDIR:
/*
@@ -2221,7 +2360,7 @@ out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES;
- dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+ dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0bd7a55a5f0..f11b9eed0de 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+ struct nfs_client *ds_clp,
+ int ds_idx)
+{
+ struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+ if (ds_clp) {
+ /* pNFS is in use, use the DS verf */
+ if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+ verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+ else
+ WARN_ON_ONCE(1);
+ }
+#endif
+ return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ WARN_ON_ONCE(verfp->committed >= 0);
+ memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+ WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ if (verfp->committed < 0) {
+ nfs_direct_set_hdr_verf(dreq, hdr);
+ return 0;
+ }
+ return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+ struct nfs_commit_data *data)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+ data->ds_commit_index);
+ WARN_ON_ONCE(verfp->committed < 0);
+ return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write)
@@ -121,22 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
* shunt off direct read and write requests before the VFS gets them,
* so this method is only ever called for swap.
*/
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
#ifndef CONFIG_NFS_SWAP
- dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
- iocb->ki_filp->f_path.dentry->d_name.name,
- (long long) pos, nr_segs);
+ dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
+ iocb->ki_filp, (long long) pos, iter->nr_segs);
return -EINVAL;
#else
- VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
if (rw == READ || rw == KERNEL_READ)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_read(iocb, iter, pos,
rw == READ ? true : false);
- return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_write(iocb, iter, pos,
rw == WRITE ? true : false);
#endif /* CONFIG_NFS_SWAP */
}
@@ -170,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
@@ -224,14 +314,31 @@ out:
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
-static void nfs_direct_complete(struct nfs_direct_req *dreq)
+static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
{
+ struct inode *inode = dreq->inode;
+
+ if (dreq->iocb && write) {
+ loff_t pos = dreq->iocb->ki_pos + dreq->count;
+
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < pos)
+ i_size_write(inode, pos);
+ spin_unlock(&inode->i_lock);
+ }
+
+ if (write)
+ nfs_zap_mapping(inode, inode->i_mapping);
+
+ inode_dio_done(inode);
+
if (dreq->iocb) {
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
}
+
complete_all(&dreq->completion);
nfs_direct_req_release(dreq);
@@ -239,9 +346,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
static void nfs_direct_readpage_release(struct nfs_page *req)
{
- dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
@@ -274,7 +381,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
hdr->release(hdr);
}
@@ -307,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t rsize = NFS_SERVER(inode)->rsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ struct iov_iter *iter,
+ loff_t pos)
+{
+ struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
+ ssize_t result = -EINVAL;
+ size_t requested_bytes = 0;
+ size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *),
- GFP_KERNEL);
- if (!pagevec)
- break;
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 1, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 1, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
+ nfs_pageio_init_read(&desc, dreq->inode, false,
+ &nfs_direct_read_completion_ops);
+ get_dreq(dreq);
+ desc.pg_dreq = dreq;
+ atomic_inc(&inode->i_dio_count);
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ rsize, &pgbase);
+ if (result < 0)
+ break;
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -374,54 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
}
req->wb_index = pos >> PAGE_SHIFT;
req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
nfs_release_request(req);
break;
}
pgbase = 0;
bytes -= req_len;
- started += req_len;
- user_addr += req_len;
+ requested_bytes += req_len;
pos += req_len;
- count -= req_len;
dreq->bytes_left -= req_len;
}
- /* The nfs_page now hold references to these pages */
nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
-{
- struct nfs_pageio_descriptor desc;
- ssize_t result = -EINVAL;
- size_t requested_bytes = 0;
- unsigned long seg;
-
- NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
- &nfs_direct_read_completion_ops);
- get_dreq(dreq);
- desc.pg_dreq = dreq;
-
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+ kvfree(pagevec);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
- break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -431,29 +481,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
+ inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
return 0;
}
-static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @iter: vector of user buffers into which to read data
+ * @pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file. For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change. Our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ *
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
+ ssize_t result = -EINVAL;
+ size_t count = iov_iter_count(iter);
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
+
+ dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
+ file, count, (long long) pos);
+
+ result = 0;
+ if (!count)
+ goto out;
+
+ mutex_lock(&inode->i_mutex);
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+ task_io_account_read(count);
+
+ result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
- goto out;
+ goto out_unlock;
dreq->inode = inode;
- dreq->bytes_left = iov_length(iov, nr_segs);
+ dreq->bytes_left = count;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
@@ -464,22 +554,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- NFS_I(inode)->read_io += iov_length(iov, nr_segs);
- result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
+ NFS_I(inode)->read_io += count;
+ result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
result = nfs_direct_wait(dreq);
+ if (result > 0)
+ iocb->ki_pos = pos + result;
+ }
+
+ nfs_direct_req_release(dreq);
+ return result;
+
out_release:
nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
return result;
}
-static void nfs_inode_dio_write_done(struct inode *inode)
-{
- nfs_zap_mapping(inode, inode->i_mapping);
- inode_dio_done(inode);
-}
-
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
@@ -498,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
dreq->count = 0;
get_dreq(dreq);
- NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
@@ -537,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
dprintk("NFS: %5u commit failed with error %d.\n",
data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
@@ -595,8 +691,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_inode_dio_write_done(dreq->inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
}
@@ -612,114 +707,10 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_inode_dio_write_done(inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
#endif
-/*
- * NB: Return the value of the first error return code. Subsequent
- * errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation. If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes. Write length accounting is
- * handled automatically by nfs_direct_write_result(). Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t wsize = NFS_SERVER(inode)->wsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
-
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
- if (!pagevec)
- break;
-
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 0, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 0, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
-
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
-
- for (i = 0; i < npages; i++) {
- struct nfs_page *req;
- unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
- pgbase, req_len);
- if (IS_ERR(req)) {
- result = PTR_ERR(req);
- break;
- }
- nfs_lock_request(req);
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
- nfs_unlock_and_release_request(req);
- break;
- }
- pgbase = 0;
- bytes -= req_len;
- started += req_len;
- user_addr += req_len;
- pos += req_len;
- count -= req_len;
- dreq->bytes_left -= req_len;
- }
- /* The nfs_page now hold references to these pages */
- nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
@@ -749,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
bit = NFS_IOHDR_NEED_RESCHED;
else if (dreq->flags == 0) {
- memcpy(&dreq->verf, hdr->verf,
- sizeof(dreq->verf));
+ nfs_direct_set_hdr_verf(dreq, hdr);
bit = NFS_IOHDR_NEED_COMMIT;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ dreq->flags =
+ NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else
bit = NFS_IOHDR_NEED_COMMIT;
@@ -765,6 +756,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
+
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
switch (bit) {
@@ -799,33 +791,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.completion = nfs_direct_write_completion,
};
+
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation. If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes. Write length accounting is
+ * handled automatically by nfs_direct_write_result(). Otherwise, if
+ * no requests have been sent, just return an error.
+ */
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
+ struct iov_iter *iter,
+ loff_t pos)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
ssize_t result = 0;
size_t requested_bytes = 0;
- unsigned long seg;
+ size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
- NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+ nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
atomic_inc(&inode->i_dio_count);
- NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+ NFS_I(inode)->write_io += iov_iter_count(iter);
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ wsize, &pgbase);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ requested_bytes += req_len;
+ pos += req_len;
+ dreq->bytes_left -= req_len;
+ }
+ nfs_direct_release_pages(pagevec, npages);
+ kvfree(pagevec);
+ if (result < 0)
break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -844,100 +880,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
return 0;
}
-static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos,
- size_t count, bool uio)
-{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct nfs_direct_req *dreq;
- struct nfs_lock_context *l_ctx;
-
- dreq = nfs_direct_req_alloc();
- if (!dreq)
- goto out;
-
- dreq->inode = inode;
- dreq->bytes_left = count;
- dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- l_ctx = nfs_get_lock_context(dreq->ctx);
- if (IS_ERR(l_ctx)) {
- result = PTR_ERR(l_ctx);
- goto out_release;
- }
- dreq->l_ctx = l_ctx;
- if (!is_sync_kiocb(iocb))
- dreq->iocb = iocb;
-
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
- result = nfs_direct_wait(dreq);
-out_release:
- nfs_direct_req_release(dreq);
-out:
- return result;
-}
-
-/**
- * nfs_file_direct_read - file direct read operation for NFS files
- * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
- * @pos: byte offset in file where reading starts
- *
- * We use this function for direct reads instead of calling
- * generic_file_aio_read() in order to avoid gfar's check to see if
- * the request starts before the end of the file. For that check
- * to work, we must generate a GETATTR before each direct read, and
- * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. Our preference is simply
- * to do all reads the application wants, and the server will take
- * care of managing the end of file boundary.
- *
- * This function also eliminates unnecessarily updating the file's
- * atime locally, as the NFS server sets the file's atime, and this
- * client must read the updated atime from the server back into its
- * cache.
- */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
-{
- ssize_t retval = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- size_t count;
-
- count = iov_length(iov, nr_segs);
- nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
-
- dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- count, (long long) pos);
-
- retval = 0;
- if (!count)
- goto out;
-
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
-
- task_io_account_read(count);
-
- retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
- if (retval > 0)
- iocb->ki_pos = pos + retval;
-
-out:
- return retval;
-}
-
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -955,51 +901,97 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t retval = -EINVAL;
+ ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count;
+ struct inode *inode = mapping->host;
+ struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
+ loff_t end;
+ size_t count = iov_iter_count(iter);
+ end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
- count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
- dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- count, (long long) pos);
+ dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
+ file, count, (long long) pos);
- retval = generic_write_checks(file, &pos, &count, 0);
- if (retval)
+ result = generic_write_checks(file, &pos, &count, 0);
+ if (result)
goto out;
- retval = -EINVAL;
+ result = -EINVAL;
if ((ssize_t) count < 0)
goto out;
- retval = 0;
+ result = 0;
if (!count)
goto out;
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
+ mutex_lock(&inode->i_mutex);
+
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+
+ if (mapping->nrpages) {
+ result = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
+ if (result)
+ goto out_unlock;
+ }
task_io_account_write(count);
- retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
- if (retval > 0) {
- struct inode *inode = mapping->host;
+ result = -ENOMEM;
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ goto out_unlock;
- iocb->ki_pos = pos + retval;
- spin_lock(&inode->i_lock);
- if (i_size_read(inode) < iocb->ki_pos)
- i_size_write(inode, iocb->ki_pos);
- spin_unlock(&inode->i_lock);
+ dreq->inode = inode;
+ dreq->bytes_left = count;
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
+ goto out_release;
+ }
+ dreq->l_ctx = l_ctx;
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+
+ if (mapping->nrpages) {
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
}
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
+ result = nfs_direct_wait(dreq);
+ if (result > 0) {
+ struct inode *inode = mapping->host;
+
+ iocb->ki_pos = pos + result;
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
+ }
+ nfs_direct_req_release(dreq);
+ return result;
+
+out_release:
+ nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
- return retval;
+ return result;
}
/**
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index ca4b11ec87a..d25f10fb492 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/dns_resolver.h>
#include "dns_resolve.h"
@@ -28,7 +29,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
kfree(ip_addr);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
#else
@@ -42,10 +42,13 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
#include <linux/seq_file.h>
#include <linux/inet.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
#include "dns_resolve.h"
#include "cache_lib.h"
#include "netns.h"
@@ -142,7 +145,7 @@ static int nfs_dns_upcall(struct cache_detail *cd,
ret = nfs_cache_upcall(cd, key->hostname);
if (ret)
- ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
+ ret = sunrpc_cache_pipe_upcall(cd, ch);
return ret;
}
@@ -349,64 +352,65 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
ret = -ESRCH;
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
+
+static struct cache_detail nfs_dns_resolve_template = {
+ .owner = THIS_MODULE,
+ .hash_size = NFS_DNS_HASHTBL_SIZE,
+ .name = "dns_resolve",
+ .cache_put = nfs_dns_ent_put,
+ .cache_upcall = nfs_dns_upcall,
+ .cache_request = nfs_dns_request,
+ .cache_parse = nfs_dns_parse,
+ .cache_show = nfs_dns_show,
+ .match = nfs_dns_match,
+ .init = nfs_dns_ent_init,
+ .update = nfs_dns_ent_update,
+ .alloc = nfs_dns_ent_alloc,
+};
+
int nfs_dns_resolver_cache_init(struct net *net)
{
- int err = -ENOMEM;
+ int err;
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct cache_detail *cd;
- struct cache_head **tbl;
- cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
- if (cd == NULL)
- goto err_cd;
-
- tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
- GFP_KERNEL);
- if (tbl == NULL)
- goto err_tbl;
-
- cd->owner = THIS_MODULE,
- cd->hash_size = NFS_DNS_HASHTBL_SIZE,
- cd->hash_table = tbl,
- cd->name = "dns_resolve",
- cd->cache_put = nfs_dns_ent_put,
- cd->cache_upcall = nfs_dns_upcall,
- cd->cache_parse = nfs_dns_parse,
- cd->cache_show = nfs_dns_show,
- cd->match = nfs_dns_match,
- cd->init = nfs_dns_ent_init,
- cd->update = nfs_dns_ent_update,
- cd->alloc = nfs_dns_ent_alloc,
-
- nfs_cache_init(cd);
- err = nfs_cache_register_net(net, cd);
+ nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net);
+ if (IS_ERR(nn->nfs_dns_resolve))
+ return PTR_ERR(nn->nfs_dns_resolve);
+
+ err = nfs_cache_register_net(net, nn->nfs_dns_resolve);
if (err)
goto err_reg;
- nn->nfs_dns_resolve = cd;
return 0;
err_reg:
- nfs_cache_destroy(cd);
- kfree(cd->hash_table);
-err_tbl:
- kfree(cd);
-err_cd:
+ cache_destroy_net(nn->nfs_dns_resolve, net);
return err;
}
void nfs_dns_resolver_cache_destroy(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct cache_detail *cd = nn->nfs_dns_resolve;
- nfs_cache_unregister_net(net, cd);
- nfs_cache_destroy(cd);
- kfree(cd->hash_table);
- kfree(cd);
+ nfs_cache_unregister_net(net, nn->nfs_dns_resolve);
+ cache_destroy_net(nn->nfs_dns_resolve, net);
}
+static int nfs4_dns_net_init(struct net *net)
+{
+ return nfs_dns_resolver_cache_init(net);
+}
+
+static void nfs4_dns_net_exit(struct net *net)
+{
+ nfs_dns_resolver_cache_destroy(net);
+}
+
+static struct pernet_operations nfs4_dns_resolver_ops = {
+ .init = nfs4_dns_net_init,
+ .exit = nfs4_dns_net_exit,
+};
+
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
@@ -443,11 +447,24 @@ static struct notifier_block nfs_dns_resolver_block = {
int nfs_dns_resolver_init(void)
{
- return rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
+ int err;
+
+ err = register_pernet_subsys(&nfs4_dns_resolver_ops);
+ if (err < 0)
+ goto out;
+ err = rpc_pipefs_notifier_register(&nfs_dns_resolver_block);
+ if (err < 0)
+ goto out1;
+ return 0;
+out1:
+ unregister_pernet_subsys(&nfs4_dns_resolver_ops);
+out:
+ return err;
}
void nfs_dns_resolver_destroy(void)
{
rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block);
+ unregister_pernet_subsys(&nfs4_dns_resolver_ops);
}
#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3c2b893665b..4042ff58fe3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -37,6 +37,8 @@
#include "iostat.h"
#include "fscache.h"
+#include "nfstrace.h"
+
#define NFSDBG_FACILITY NFSDBG_FILE
static const struct vm_operations_struct nfs_file_vm_ops;
@@ -63,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
{
int res;
- dprintk("NFS: open file(%s/%s)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name);
+ dprintk("NFS: open file(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
res = nfs_check_flags(filp->f_flags);
@@ -79,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
int
nfs_file_release(struct inode *inode, struct file *filp)
{
- dprintk("NFS: release(%s/%s)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name);
+ dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
@@ -121,10 +119,8 @@ force_reval:
loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
{
- dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name,
- offset, whence);
+ dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
+ filp, offset, whence);
/*
* whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
@@ -148,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek);
int
nfs_file_flush(struct file *file, fl_owner_t id)
{
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(file);
- dprintk("NFS: flush(%s/%s)\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dprintk("NFS: flush(%pD2)\n", file);
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
if ((file->f_mode & FMODE_WRITE) == 0)
@@ -172,23 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id)
EXPORT_SYMBOL_GPL(nfs_file_flush);
ssize_t
-nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
{
- struct dentry * dentry = iocb->ki_filp->f_path.dentry;
- struct inode * inode = dentry->d_inode;
+ struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+ return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
- dprintk("NFS: read(%s/%s, %lu@%lu)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+ dprintk("NFS: read(%pD2, %zu@%lu)\n",
+ iocb->ki_filp,
+ iov_iter_count(to), (unsigned long) iocb->ki_pos);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
- result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+ result = generic_file_read_iter(iocb, to);
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
}
@@ -201,13 +192,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
ssize_t res;
- dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long long) *ppos);
+ dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
+ filp, (unsigned long) count, (unsigned long long) *ppos);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (!res) {
@@ -222,12 +211,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(file);
int status;
- dprintk("NFS: mmap(%s/%s)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dprintk("NFS: mmap(%pD2)\n", file);
/* Note: generic_file_mmap() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
@@ -256,15 +243,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
int
nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(file);
int have_error, do_resend, status;
int ret = 0;
- dprintk("NFS: fsync file(%s/%s) datasync %d\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- datasync);
+ dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
@@ -292,7 +276,9 @@ static int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
+
+ trace_nfs_fsync_enter(inode);
do {
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -310,6 +296,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
end = LLONG_MAX;
} while (ret == -EAGAIN);
+ trace_nfs_fsync_exit(inode, ret);
return ret;
}
@@ -366,10 +353,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
int once_thru = 0;
- dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- mapping->host->i_ino, len, (long long) pos);
+ dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
+ file, mapping->host->i_ino, len, (long long) pos);
start:
/*
@@ -406,12 +391,11 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
- dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- mapping->host->i_ino, len, (long long) pos);
+ dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
+ file, mapping->host->i_ino, len, (long long) pos);
/*
* Zero any uninitialised parts of the page, and then mark the page
@@ -441,6 +425,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
if (status < 0)
return status;
NFS_I(mapping->host)->write_io += copied;
+
+ if (nfs_ctx_key_to_expire(ctx)) {
+ status = nfs_wb_all(mapping->host);
+ if (status < 0)
+ return status;
+ }
+
return copied;
}
@@ -451,11 +442,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
*/
-static void nfs_invalidate_page(struct page *page, unsigned long offset)
+static void nfs_invalidate_page(struct page *page, unsigned int offset,
+ unsigned int length)
{
- dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset);
+ dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
+ page, offset, length);
- if (offset != 0)
+ if (offset != 0 || length < PAGE_CACHE_SIZE)
return;
/* Cancel any unstarted writes on this page */
nfs_wb_page_cancel(page_file_mapping(page)->host, page);
@@ -493,6 +486,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
return nfs_fscache_release_page(page, gfp);
}
+static void nfs_check_dirty_writeback(struct page *page,
+ bool *dirty, bool *writeback)
+{
+ struct nfs_inode *nfsi;
+ struct address_space *mapping = page_file_mapping(page);
+
+ if (!mapping || PageSwapCache(page))
+ return;
+
+ /*
+ * Check if an unstable page is currently being committed and
+ * if so, have the VM treat it as if the page is under writeback
+ * so it will not block due to pages that will shortly be freeable.
+ */
+ nfsi = NFS_I(mapping->host);
+ if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+ *writeback = true;
+ return;
+ }
+
+ /*
+ * If PagePrivate() is set, then the page is not freeable and as the
+ * inode is not being committed, it's not going to be cleaned in the
+ * near future so treat it as dirty
+ */
+ if (PagePrivate(page))
+ *dirty = true;
+}
+
/*
* Attempt to clear the private state associated with a page when an error
* occurs that requires the cached contents of an inode to be written back or
@@ -540,6 +562,7 @@ const struct address_space_operations nfs_file_aops = {
.direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
+ .is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
#ifdef CONFIG_NFS_SWAP
.swap_activate = nfs_swap_activate,
@@ -556,22 +579,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *filp = vma->vm_file;
- struct dentry *dentry = filp->f_path.dentry;
+ struct inode *inode = file_inode(filp);
unsigned pagelen;
int ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
- dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- filp->f_mapping->host->i_ino,
+ dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
+ filp, filp->f_mapping->host->i_ino,
(long long)page_offset(page));
/* make sure the cache has finished storing the page */
- nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ nfs_fscache_wait_on_page_write(NFS_I(inode), page);
lock_page(page);
mapping = page_file_mapping(page);
- if (mapping != dentry->d_inode->i_mapping)
+ if (mapping != inode->i_mapping)
goto out_unlock;
wait_on_page_writeback(page);
@@ -594,6 +616,7 @@ out:
static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
@@ -605,26 +628,30 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
return 1;
ctx = nfs_file_open_context(filp);
- if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
+ if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
+ nfs_ctx_key_to_expire(ctx))
return 1;
return 0;
}
-ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
- struct dentry * dentry = iocb->ki_filp->f_path.dentry;
- struct inode * inode = dentry->d_inode;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
unsigned long written = 0;
ssize_t result;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(from);
+ loff_t pos = iocb->ki_pos;
- if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+ result = nfs_key_timeout_notify(file, inode);
+ if (result)
+ return result;
+
+ if (file->f_flags & O_DIRECT)
+ return nfs_file_direct_write(iocb, from, pos, true);
- dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (long long) pos);
+ dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+ file, count, (long long) pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -632,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
/*
* O_APPEND implies that we must revalidate the file length.
*/
- if (iocb->ki_filp->f_flags & O_APPEND) {
- result = nfs_revalidate_file_size(inode, iocb->ki_filp);
+ if (file->f_flags & O_APPEND) {
+ result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
}
@@ -642,13 +669,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
if (!count)
goto out;
- result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ result = generic_file_write_iter(iocb, from);
if (result > 0)
written = result;
/* Return error values for O_DSYNC and IS_SYNC() */
- if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
- int err = vfs_fsync(iocb->ki_filp, 0);
+ if (result >= 0 && nfs_need_sync_write(file, inode)) {
+ int err = vfs_fsync(file, 0);
if (err < 0)
result = err;
}
@@ -663,38 +690,6 @@ out_swapfile:
}
EXPORT_SYMBOL_GPL(nfs_file_write);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
- struct file *filp, loff_t *ppos,
- size_t count, unsigned int flags)
-{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- unsigned long written = 0;
- ssize_t ret;
-
- dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (unsigned long) count, (unsigned long long) *ppos);
-
- /*
- * The combination of splice and an O_APPEND destination is disallowed.
- */
-
- ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
- if (ret > 0)
- written = ret;
-
- if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
- int err = vfs_fsync(filp, 0);
- if (err < 0)
- ret = err;
- }
- if (ret > 0)
- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_write);
-
static int
do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
@@ -744,6 +739,7 @@ static int
do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
+ struct nfs_lock_context *l_ctx;
int status;
/*
@@ -752,6 +748,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
*/
nfs_sync_mapping(filp->f_mapping);
+ l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
+ if (!IS_ERR(l_ctx)) {
+ status = nfs_iocounter_wait(&l_ctx->io_count);
+ nfs_put_lock_context(l_ctx);
+ if (status < 0)
+ return status;
+ }
+
/* NOTE: special case
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
@@ -824,10 +828,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
int ret = -ENOLCK;
int is_local = 0;
- dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name,
- fl->fl_type, fl->fl_flags,
+ dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
+ filp, fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
@@ -864,10 +866,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
struct inode *inode = filp->f_mapping->host;
int is_local = 0;
- dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
- filp->f_path.dentry->d_parent->d_name.name,
- filp->f_path.dentry->d_name.name,
- fl->fl_type, fl->fl_flags);
+ dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
+ filp, fl->fl_type, fl->fl_flags);
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
@@ -885,10 +885,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
- fl->fl_owner = (fl_owner_t)filp;
- fl->fl_start = 0;
- fl->fl_end = OFFSET_MAX;
-
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
@@ -901,19 +897,17 @@ EXPORT_SYMBOL_GPL(nfs_flock);
*/
int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
{
- dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name, arg);
+ dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
return -EINVAL;
}
EXPORT_SYMBOL_GPL(nfs_setlease);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = nfs_file_read,
+ .write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
@@ -922,7 +916,7 @@ const struct file_operations nfs_file_operations = {
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
- .splice_write = nfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 00000000000..8516cdffb9e
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the pNFS Files Layout Driver kernel module
+#
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
index 194c4841033..d2eba1c13b7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -35,10 +35,11 @@
#include <linux/sunrpc/metrics.h>
-#include "nfs4session.h"
-#include "internal.h"
-#include "delegation.h"
-#include "nfs4filelayout.h"
+#include "../nfs4session.h"
+#include "../internal.h"
+#include "../delegation.h"
+#include "filelayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -83,43 +84,45 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
- "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
- (long long)NFS_FILEID(hdr->inode),
+ (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
&hdr->pages,
- hdr->completion_ops);
+ hdr->completion_ops,
+ hdr->dreq);
}
}
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
- "(req %s/%lld, %u bytes @ offset %llu)\n", __func__,
+ "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid,
hdr->inode->i_sb->s_id,
- (long long)NFS_FILEID(hdr->inode),
+ (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count,
(unsigned long long)data->args.offset);
task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
&hdr->pages,
- hdr->completion_ops);
+ hdr->completion_ops,
+ hdr->dreq);
}
}
@@ -127,7 +130,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
{
if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return;
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
pnfs_return_layout(inode);
}
@@ -157,11 +159,14 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_schedule_stateid_recovery(mds_server, state);
+ if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
+ goto out_bad_stateid;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
- if (state != NULL)
- nfs4_schedule_stateid_recovery(mds_server, state);
+ if (state != NULL) {
+ if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
+ goto out_bad_stateid;
+ }
nfs4_schedule_lease_recovery(mds_client);
goto wait_on_recovery;
/* DS session errors */
@@ -225,6 +230,9 @@ reset:
out:
task->tk_status = 0;
return -EAGAIN;
+out_bad_stateid:
+ task->tk_status = -EIO;
+ return 0;
wait_on_recovery:
rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
@@ -235,11 +243,12 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
+ trace_nfs4_pnfs_read(data, task->tk_status);
err = filelayout_async_handle_error(task, data->args.context->state,
data->ds_clp, hdr->lseg);
@@ -261,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used.
*/
static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
@@ -270,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
return;
pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
@@ -296,31 +305,41 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
+ rpc_exit(task, -EIO);
+ return;
+ }
if (filelayout_reset_to_mds(rdata->header->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_read(rdata);
rpc_exit(task, 0);
return;
}
- rdata->read_done_cb = filelayout_read_done_cb;
+ rdata->pgio_done_cb = filelayout_read_done_cb;
- nfs41_setup_sequence(rdata->ds_clp->cl_session,
+ if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args,
&rdata->res.seq_res,
- task);
+ task))
+ return;
+ if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
+ rdata->args.lock_context, FMODE_READ) == -EIO)
+ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ nfs41_sequence_done(task, &rdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
rdata->header->mds_ops->rpc_call_done(task, data);
@@ -328,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -344,11 +363,12 @@ static void filelayout_read_release(void *data)
}
static int filelayout_write_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
+ trace_nfs4_pnfs_write(data, task->tk_status);
err = filelayout_async_handle_error(task, data->args.context->state,
data->ds_clp, hdr->lseg);
@@ -381,6 +401,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
{
int err;
+ trace_nfs4_pnfs_commit_ds(data, task->tk_status);
err = filelayout_async_handle_error(task, NULL, data->ds_clp,
data->lseg);
@@ -398,27 +419,37 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
+ rpc_exit(task, -EIO);
+ return;
+ }
if (filelayout_reset_to_mds(wdata->header->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_write(wdata);
rpc_exit(task, 0);
return;
}
- nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args,
&wdata->res.seq_res,
- task);
+ task))
+ return;
+ if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
+ wdata->args.lock_context, FMODE_WRITE) == -EIO)
+ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
- task->tk_status == 0)
+ task->tk_status == 0) {
+ nfs41_sequence_done(task, &wdata->res.seq_res);
return;
+ }
/* Note this may cause RPC to be resent */
wdata->header->mds_ops->rpc_call_done(task, data);
@@ -426,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -498,11 +529,12 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};
static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
+ struct rpc_clnt *ds_clnt;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
@@ -517,12 +549,18 @@ filelayout_read_pagelist(struct nfs_read_data *data)
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds)
return PNFS_NOT_ATTEMPTED;
+
+ ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
+ if (IS_ERR(ds_clnt))
+ return PNFS_NOT_ATTEMPTED;
+
dprintk("%s USE DS: %s cl_count %d\n", __func__,
ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
/* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
@@ -531,18 +569,19 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
- nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
- &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+ nfs_initiate_pgio(ds_clnt, data,
+ &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
+ struct rpc_clnt *ds_clnt;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
@@ -553,24 +592,27 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds)
return PNFS_NOT_ATTEMPTED;
+
+ ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
+ if (IS_ERR(ds_clnt))
+ return PNFS_NOT_ATTEMPTED;
+
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
- data->write_done_cb = filelayout_write_done_cb;
+ data->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
- /*
- * Get the file offset on the dserver. Set the write offset to
- * this offset and save the original offset.
- */
+
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+ nfs_initiate_pgio(ds_clnt, data,
&filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
@@ -594,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_deviceid_node *d;
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
- struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
dprintk("--> %s\n", __func__);
@@ -612,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
- if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
+ if (!fl->stripe_unit) {
dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit);
goto out;
@@ -622,7 +663,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
NFS_SERVER(lo->plh_inode)->nfs_client, id);
if (d == NULL) {
- dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags);
+ dsaddr = filelayout_get_device_info(lo->plh_inode, id,
+ lo->plh_lc_cred, gfp_flags);
if (dsaddr == NULL)
goto out;
} else
@@ -648,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out_put;
}
- if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
- dprintk("%s Stripe unit (%u) not aligned with rsize %u "
- "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
- nfss->wsize);
- }
-
status = 0;
out:
dprintk("--> %s returns %d\n", __func__, status);
@@ -806,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
struct pnfs_commit_bucket *buckets;
- int size;
+ int size, i;
if (fl->commit_through_mds)
return 0;
- if (cinfo->ds->nbuckets != 0) {
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ if (cinfo->ds->nbuckets >= size) {
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of <multipath_list4, fh> keys, each
@@ -820,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
return 0;
}
- size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
gfp_flags);
if (!buckets)
return -ENOMEM;
- else {
- int i;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ /* mark direct verifier as unset */
+ buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ }
- spin_lock(cinfo->lock);
- if (cinfo->ds->nbuckets != 0)
- kfree(buckets);
- else {
- cinfo->ds->buckets = buckets;
- cinfo->ds->nbuckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&buckets[i].written);
- INIT_LIST_HEAD(&buckets[i].committing);
- }
- }
- spin_unlock(cinfo->lock);
- return 0;
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets >= size)
+ goto out;
+ for (i = 0; i < cinfo->ds->nbuckets; i++) {
+ list_splice(&cinfo->ds->buckets[i].written,
+ &buckets[i].written);
+ list_splice(&cinfo->ds->buckets[i].committing,
+ &buckets[i].committing);
+ buckets[i].direct_verf.committed =
+ cinfo->ds->buckets[i].direct_verf.committed;
+ buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
+ buckets[i].clseg = cinfo->ds->buckets[i].clseg;
}
+ swap(cinfo->ds->buckets, buckets);
+ cinfo->ds->nbuckets = size;
+out:
+ spin_unlock(cinfo->lock);
+ kfree(buckets);
+ return 0;
}
static struct pnfs_layout_segment *
@@ -871,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
- * return true : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
-static bool
+static size_t
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
+ unsigned int size;
u64 p_stripe, r_stripe;
- u32 stripe_unit;
+ u32 stripe_offset;
+ u64 segment_offset = pgio->pg_lseg->pls_range.offset;
+ u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
- if (!pnfs_generic_pg_test(pgio, prev, req) ||
- !nfs_generic_pg_test(pgio, prev, req))
- return false;
+ /* calls nfs_generic_pg_test */
+ size = pnfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
- p_stripe = (u64)req_offset(prev);
- r_stripe = (u64)req_offset(req);
- stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+ /* see if req and prev are in the same stripe */
+ if (prev) {
+ p_stripe = (u64)req_offset(prev) - segment_offset;
+ r_stripe = (u64)req_offset(req) - segment_offset;
+ do_div(p_stripe, stripe_unit);
+ do_div(r_stripe, stripe_unit);
- do_div(p_stripe, stripe_unit);
- do_div(r_stripe, stripe_unit);
+ if (p_stripe != r_stripe)
+ return 0;
+ }
- return (p_stripe == r_stripe);
+ /* calculate remaining bytes in the current stripe */
+ div_u64_rem((u64)req_offset(req) - segment_offset,
+ stripe_unit,
+ &stripe_offset);
+ WARN_ON_ONCE(stripe_offset > stripe_unit);
+ if (stripe_offset >= stripe_unit)
+ return 0;
+ return min(stripe_unit - (unsigned int)stripe_offset, size);
}
static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase) {
- /*
- * Handling unaligned pages is difficult, because have to
- * somehow split a req in two in certain cases in the
- * pg.test code. Avoid this by just not using pnfs
- * in this case.
- */
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -929,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase)
- goto out_mds;
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -1023,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
*/
j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
+ spin_lock(cinfo->lock);
buckets = cinfo->ds->buckets;
list = &buckets[i].written;
if (list_empty(list)) {
@@ -1036,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
+ spin_unlock(cinfo->lock);
return list;
}
@@ -1079,16 +1128,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
+ struct rpc_clnt *ds_clnt;
u32 idx;
struct nfs_fh *fh;
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
ds = nfs4_fl_prepare_ds(lseg, idx);
- if (!ds) {
- prepare_to_resend_writes(data);
- filelayout_commit_release(data);
- return -EAGAIN;
- }
+ if (!ds)
+ goto out_err;
+
+ ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode);
+ if (IS_ERR(ds_clnt))
+ goto out_err;
+
dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
data->commit_done_cb = filelayout_commit_done_cb;
@@ -1097,9 +1149,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
- return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
+ return nfs_initiate_commit(ds_clnt, data,
&filelayout_commit_call_ops, how,
RPC_TASK_SOFTCONN);
+out_err:
+ prepare_to_resend_writes(data);
+ filelayout_commit_release(data);
+ return -EAGAIN;
}
static int
@@ -1125,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
return ret;
}
+/* Note called with cinfo->lock held. */
static int
filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct nfs_commit_info *cinfo,
@@ -1169,19 +1226,22 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo)
{
struct pnfs_commit_bucket *b;
+ struct pnfs_layout_segment *freeme;
int i;
- /* NOTE cinfo->lock is NOT held, relying on fact that this is
- * only called on single thread per dreq.
- * Can't take the lock because need to do pnfs_put_lseg
- */
+restart:
+ spin_lock(cinfo->lock);
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
- pnfs_put_lseg(b->wlseg);
+ freeme = b->wlseg;
b->wlseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
+ goto restart;
}
}
cinfo->ds->nwritten = 0;
+ spin_unlock(cinfo->lock);
}
static unsigned int
@@ -1192,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;
+ struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
@@ -1202,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (!data)
break;
data->ds_commit_index = i;
+ spin_lock(cinfo->lock);
data->lseg = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
list_add(&data->pages, list);
nreq++;
}
@@ -1213,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- pnfs_put_lseg(bucket->clseg);
+ spin_lock(cinfo->lock);
+ freeme = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
}
/* Caller will clean up entries put on list */
return nreq;
@@ -1279,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return &flo->generic_hdr;
+ return flo != NULL ? &flo->generic_hdr : NULL;
}
static void
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
index 8c07241fe52..ffbddf2219e 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -30,13 +30,13 @@
#ifndef FS_NFS_NFS4FILELAYOUT_H
#define FS_NFS_NFS4FILELAYOUT_H
-#include "pnfs.h"
+#include "../pnfs.h"
/*
* Default data server connection timeout and retrans vaules.
* Set by module paramters dataserver_timeo and dataserver_retrans.
*/
-#define NFS4_DEF_DS_TIMEO 60
+#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */
#define NFS4_DEF_DS_RETRANS 5
/*
@@ -70,6 +70,8 @@ struct nfs4_pnfs_ds {
struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
+ unsigned long ds_state;
+#define NFS4DS_CONNECTING 0 /* ds is establishing connection */
};
struct nfs4_file_layout_dsaddr {
@@ -148,6 +150,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
-filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id,
+ struct rpc_cred *cred, gfp_t gfp_flags);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index b720064bcd7..44bf0140a4c 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -31,10 +31,11 @@
#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
+#include <linux/sunrpc/addr.h>
-#include "internal.h"
-#include "nfs4session.h"
-#include "nfs4filelayout.h"
+#include "../internal.h"
+#include "../nfs4session.h"
+#include "filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -94,7 +95,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
b6 = (struct sockaddr_in6 *)addr2;
/* LINKLOCAL addresses must have matching scope_id */
- if (ipv6_addr_scope(&a6->sin6_addr) ==
+ if (ipv6_addr_src_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
return false;
@@ -184,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
if (status)
goto out_put;
+ smp_wmb();
ds->ds_clp = clp;
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
@@ -667,7 +669,10 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
* of available devices, and return it.
*/
struct nfs4_file_layout_dsaddr *
-filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
+filelayout_get_device_info(struct inode *inode,
+ struct nfs4_deviceid *dev_id,
+ struct rpc_cred *cred,
+ gfp_t gfp_flags)
{
struct pnfs_device *pdev = NULL;
u32 max_resp_sz;
@@ -707,8 +712,9 @@ filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gf
pdev->pgbase = 0;
pdev->pglen = max_resp_sz;
pdev->mincount = 0;
+ pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead;
- rc = nfs4_proc_getdeviceinfo(server, pdev);
+ rc = nfs4_proc_getdeviceinfo(server, pdev, cred);
dprintk("%s getdevice info returns %d\n", __func__, rc);
if (rc)
goto out_free;
@@ -774,34 +780,57 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
return flseg->fh_array[i];
}
+static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
+{
+ might_sleep();
+ wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+}
+
+static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
+{
+ smp_mb__before_atomic();
+ clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
+ smp_mb__after_atomic();
+ wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
+}
+
+
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-
- if (filelayout_test_devid_unavailable(devid))
- return NULL;
+ struct nfs4_pnfs_ds *ret = ds;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
filelayout_mark_devid_invalid(devid);
- return NULL;
+ goto out;
}
+ smp_rmb();
+ if (ds->ds_clp)
+ goto out_test_devid;
- if (!ds->ds_clp) {
+ if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
err = nfs4_ds_connect(s, ds);
- if (err) {
+ if (err)
nfs4_mark_deviceid_unavailable(devid);
- return NULL;
- }
+ nfs4_clear_ds_conn_bit(ds);
+ } else {
+ /* Either ds is connected, or ds is NULL */
+ nfs4_wait_ds_connect(ds);
}
- return ds;
+out_test_devid:
+ if (filelayout_test_devid_unavailable(devid))
+ ret = NULL;
+out:
+ return ret;
}
module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 24d1d1c5fca..3ef01f0ba0b 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp)
/* create a cache index for looking up filehandles */
clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
&nfs_fscache_server_index_def,
- clp);
+ clp, true);
dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
clp, clp->fscache);
}
@@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
/* create a cache index for looking up filehandles */
nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
&nfs_fscache_super_index_def,
- nfss);
+ nfss, true);
dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
nfss, nfss->fscache);
return;
@@ -178,163 +178,79 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
/*
* Initialise the per-inode cache cookie pointer for an NFS inode.
*/
-void nfs_fscache_init_inode_cookie(struct inode *inode)
+void nfs_fscache_init_inode(struct inode *inode)
{
- NFS_I(inode)->fscache = NULL;
- if (S_ISREG(inode->i_mode))
- set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
-}
-
-/*
- * Get the per-inode cache cookie for an NFS inode.
- */
-static void nfs_fscache_enable_inode_cookie(struct inode *inode)
-{
- struct super_block *sb = inode->i_sb;
struct nfs_inode *nfsi = NFS_I(inode);
- if (nfsi->fscache || !NFS_FSCACHE(inode))
+ nfsi->fscache = NULL;
+ if (!S_ISREG(inode->i_mode))
return;
-
- if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) {
- nfsi->fscache = fscache_acquire_cookie(
- NFS_SB(sb)->fscache,
- &nfs_fscache_inode_object_def,
- nfsi);
-
- dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
- sb, nfsi, nfsi->fscache);
- }
+ nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
+ &nfs_fscache_inode_object_def,
+ nfsi, false);
}
/*
* Release a per-inode cookie.
*/
-void nfs_fscache_release_inode_cookie(struct inode *inode)
+void nfs_fscache_clear_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ struct fscache_cookie *cookie = nfs_i_fscache(inode);
- dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
- nfsi, nfsi->fscache);
+ dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
- fscache_relinquish_cookie(nfsi->fscache, 0);
+ fscache_relinquish_cookie(cookie, false);
nfsi->fscache = NULL;
}
-/*
- * Retire a per-inode cookie, destroying the data attached to it.
- */
-void nfs_fscache_zap_inode_cookie(struct inode *inode)
+static bool nfs_fscache_can_enable(void *data)
{
- struct nfs_inode *nfsi = NFS_I(inode);
+ struct inode *inode = data;
- dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n",
- nfsi, nfsi->fscache);
-
- fscache_relinquish_cookie(nfsi->fscache, 1);
- nfsi->fscache = NULL;
+ return !inode_is_open_for_write(inode);
}
/*
- * Turn off the cache with regard to a per-inode cookie if opened for writing,
- * invalidating all the pages in the page cache relating to the associated
- * inode to clear the per-page caching.
- */
-static void nfs_fscache_disable_inode_cookie(struct inode *inode)
-{
- clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
-
- if (NFS_I(inode)->fscache) {
- dfprintk(FSCACHE,
- "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
-
- /* Need to uncache any pages attached to this inode that
- * fscache knows about before turning off the cache.
- */
- fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode);
- nfs_fscache_zap_inode_cookie(inode);
- }
-}
-
-/*
- * wait_on_bit() sleep function for uninterruptible waiting
- */
-static int nfs_fscache_wait_bit(void *flags)
-{
- schedule();
- return 0;
-}
-
-/*
- * Lock against someone else trying to also acquire or relinquish a cookie
- */
-static inline void nfs_fscache_inode_lock(struct inode *inode)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
-
- while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags))
- wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK,
- nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE);
-}
-
-/*
- * Unlock cookie management lock
- */
-static inline void nfs_fscache_inode_unlock(struct inode *inode)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
-
- smp_mb__before_clear_bit();
- clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags);
- smp_mb__after_clear_bit();
- wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK);
-}
-
-/*
- * Decide if we should enable or disable local caching for this inode.
- * - For now, with NFS, only regular files that are open read-only will be able
- * to use the cache.
- * - May be invoked multiple times in parallel by parallel nfs_open() functions.
- */
-void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
-{
- if (NFS_FSCACHE(inode)) {
- nfs_fscache_inode_lock(inode);
- if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
- nfs_fscache_disable_inode_cookie(inode);
- else
- nfs_fscache_enable_inode_cookie(inode);
- nfs_fscache_inode_unlock(inode);
- }
-}
-EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie);
-
-/*
- * Replace a per-inode cookie due to revalidation detecting a file having
- * changed on the server.
+ * Enable or disable caching for a file that is being opened as appropriate.
+ * The cookie is allocated when the inode is initialised, but is not enabled at
+ * that time. Enablement is deferred to file-open time to avoid stat() and
+ * access() thrashing the cache.
+ *
+ * For now, with NFS, only regular files that are open read-only will be able
+ * to use the cache.
+ *
+ * We enable the cache for an inode if we open it read-only and it isn't
+ * currently open for writing. We disable the cache if the inode is open
+ * write-only.
+ *
+ * The caller uses the file struct to pin i_writecount on the inode before
+ * calling us when a file is opened for writing, so we can make use of that.
+ *
+ * Note that this may be invoked multiple times in parallel by parallel
+ * nfs_open() functions.
*/
-void nfs_fscache_reset_inode_cookie(struct inode *inode)
+void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_server *nfss = NFS_SERVER(inode);
- NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache);
+ struct fscache_cookie *cookie = nfs_i_fscache(inode);
- nfs_fscache_inode_lock(inode);
- if (nfsi->fscache) {
- /* retire the current fscache cache and get a new one */
- fscache_relinquish_cookie(nfsi->fscache, 1);
-
- nfsi->fscache = fscache_acquire_cookie(
- nfss->nfs_client->fscache,
- &nfs_fscache_inode_object_def,
- nfsi);
+ if (!fscache_cookie_valid(cookie))
+ return;
- dfprintk(FSCACHE,
- "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
- nfss, nfsi, old, nfsi->fscache);
+ if (inode_is_open_for_write(inode)) {
+ dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
+ clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
+ fscache_disable_cookie(cookie, true);
+ fscache_uncache_all_inode_pages(cookie, inode);
+ } else {
+ dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
+ fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
+ if (fscache_cookie_enabled(cookie))
+ set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
}
- nfs_fscache_inode_unlock(inode);
}
+EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
/*
* Release the caching state associated with a page, if the page isn't busy
@@ -344,12 +260,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
if (PageFsCache(page)) {
- struct nfs_inode *nfsi = NFS_I(page->mapping->host);
- struct fscache_cookie *cookie = nfsi->fscache;
+ struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
- cookie, page, nfsi);
+ cookie, page, NFS_I(page->mapping->host));
if (!fscache_maybe_release_page(cookie, page, gfp))
return 0;
@@ -367,13 +282,12 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
*/
void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct fscache_cookie *cookie = nfsi->fscache;
+ struct fscache_cookie *cookie = nfs_i_fscache(inode);
BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
- cookie, page, nfsi);
+ cookie, page, NFS_I(inode));
fscache_wait_on_page_write(cookie, page);
@@ -417,9 +331,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
dfprintk(FSCACHE,
"NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
- NFS_I(inode)->fscache, page, page->index, page->flags, inode);
+ nfs_i_fscache(inode), page, page->index, page->flags, inode);
- ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
+ ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
page,
nfs_readpage_from_fscache_complete,
ctx,
@@ -459,9 +373,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
int ret;
dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
- NFS_I(inode)->fscache, npages, inode);
+ nfs_i_fscache(inode), npages, inode);
- ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
+ ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
mapping, pages, nr_pages,
nfs_readpage_from_fscache_complete,
ctx,
@@ -506,15 +420,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
dfprintk(FSCACHE,
"NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
- NFS_I(inode)->fscache, page, page->index, page->flags, sync);
+ nfs_i_fscache(inode), page, page->index, page->flags, sync);
- ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
dfprintk(FSCACHE,
"NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
page, page->index, page->flags, ret);
if (ret != 0) {
- fscache_uncache_page(NFS_I(inode)->fscache, page);
+ fscache_uncache_page(nfs_i_fscache(inode), page);
nfs_add_fscache_stats(inode,
NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 4ecb76652eb..d7fe3e799f2 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -76,11 +76,9 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *);
extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
-extern void nfs_fscache_init_inode_cookie(struct inode *);
-extern void nfs_fscache_release_inode_cookie(struct inode *);
-extern void nfs_fscache_zap_inode_cookie(struct inode *);
-extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *);
-extern void nfs_fscache_reset_inode_cookie(struct inode *);
+extern void nfs_fscache_init_inode(struct inode *);
+extern void nfs_fscache_clear_inode(struct inode *);
+extern void nfs_fscache_open_file(struct inode *, struct file *);
extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
extern int nfs_fscache_release_page(struct page *, gfp_t);
@@ -187,12 +185,10 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
-static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {}
-static inline void nfs_fscache_set_inode_cookie(struct inode *inode,
- struct file *filp) {}
-static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {}
+static inline void nfs_fscache_init_inode(struct inode *inode) {}
+static inline void nfs_fscache_clear_inode(struct inode *inode) {}
+static inline void nfs_fscache_open_file(struct inode *inode,
+ struct file *filp) {}
static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c3664..b94f80420a5 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -95,7 +95,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
goto out;
}
- inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+ inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL);
if (IS_ERR(inode)) {
dprintk("nfs_get_root: get root inode failed\n");
ret = ERR_CAST(inode);
@@ -120,14 +120,14 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
security_d_instantiate(ret, inode);
spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+ if (IS_ROOT(ret) && !ret->d_fsdata &&
+ !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
ret->d_fsdata = name;
name = NULL;
}
spin_unlock(&ret->d_lock);
out:
- if (name)
- kfree(name);
+ kfree(name);
nfs_free_fattr(fsinfo.fattr);
return ret;
}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index bc3968fa81e..567983d2c0e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -49,6 +49,7 @@
#include "internal.h"
#include "netns.h"
+#include "nfs4trace.h"
#define NFS_UINT_MAXLEN 11
@@ -63,6 +64,7 @@ struct idmap_legacy_upcalldata {
};
struct idmap {
+ struct rpc_pipe_dir_object idmap_pdo;
struct rpc_pipe *idmap_pipe;
struct idmap_legacy_upcalldata *idmap_upcall_data;
struct mutex idmap_mutex;
@@ -97,7 +99,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
{
struct nfs4_string *owner = fattr->owner_name;
- __u32 uid;
+ kuid_t uid;
if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
return false;
@@ -111,7 +113,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr
static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
{
struct nfs4_string *group = fattr->group_name;
- __u32 gid;
+ kgid_t gid;
if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
return false;
@@ -193,7 +195,8 @@ static int nfs_idmap_init_keyring(void)
if (!cred)
return -ENOMEM;
- keyring = keyring_alloc(".id_resolver", 0, 0, cred,
+ keyring = keyring_alloc(".id_resolver",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -261,29 +264,42 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
return desclen;
}
-static ssize_t nfs_idmap_request_key(struct key_type *key_type,
- const char *name, size_t namelen,
- const char *type, void *data,
- size_t data_size, struct idmap *idmap)
+static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
+ const char *type, struct idmap *idmap)
{
- const struct cred *saved_cred;
- struct key *rkey;
char *desc;
- struct user_key_payload *payload;
+ struct key *rkey;
ssize_t ret;
ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
if (ret <= 0)
- goto out;
+ return ERR_PTR(ret);
+
+ rkey = request_key(&key_type_id_resolver, desc, "");
+ if (IS_ERR(rkey)) {
+ mutex_lock(&idmap->idmap_mutex);
+ rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
+ desc, "", 0, idmap);
+ mutex_unlock(&idmap->idmap_mutex);
+ }
+
+ kfree(desc);
+ return rkey;
+}
+
+static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
+ const char *type, void *data,
+ size_t data_size, struct idmap *idmap)
+{
+ const struct cred *saved_cred;
+ struct key *rkey;
+ struct user_key_payload *payload;
+ ssize_t ret;
saved_cred = override_creds(id_resolver_cache);
- if (idmap)
- rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap);
- else
- rkey = request_key(&key_type_id_resolver, desc, "");
+ rkey = nfs_idmap_request_key(name, namelen, type, idmap);
revert_creds(saved_cred);
- kfree(desc);
if (IS_ERR(rkey)) {
ret = PTR_ERR(rkey);
goto out;
@@ -296,7 +312,7 @@ static ssize_t nfs_idmap_request_key(struct key_type *key_type,
if (ret < 0)
goto out_up;
- payload = rcu_dereference(rkey->payload.data);
+ payload = rcu_dereference(rkey->payload.rcudata);
if (IS_ERR_OR_NULL(payload)) {
ret = PTR_ERR(payload);
goto out_up;
@@ -315,23 +331,6 @@ out:
return ret;
}
-static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
- const char *type, void *data,
- size_t data_size, struct idmap *idmap)
-{
- ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver,
- name, namelen, type, data,
- data_size, NULL);
- if (ret < 0) {
- mutex_lock(&idmap->idmap_mutex);
- ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
- name, namelen, type, data,
- data_size, idmap);
- mutex_unlock(&idmap->idmap_mutex);
- }
- return ret;
-}
-
/* ID -> Name */
static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
size_t buflen, struct idmap *idmap)
@@ -404,16 +403,23 @@ static struct key_type key_type_id_resolver_legacy = {
.request_key = nfs_idmap_legacy_upcall,
};
-static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
+static void nfs_idmap_pipe_destroy(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
- if (pipe->dentry)
+ struct idmap *idmap = pdo->pdo_data;
+ struct rpc_pipe *pipe = idmap->idmap_pipe;
+
+ if (pipe->dentry) {
rpc_unlink(pipe->dentry);
+ pipe->dentry = NULL;
+ }
}
-static int __nfs_idmap_register(struct dentry *dir,
- struct idmap *idmap,
- struct rpc_pipe *pipe)
+static int nfs_idmap_pipe_create(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
+ struct idmap *idmap = pdo->pdo_data;
+ struct rpc_pipe *pipe = idmap->idmap_pipe;
struct dentry *dentry;
dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
@@ -423,36 +429,10 @@ static int __nfs_idmap_register(struct dentry *dir,
return 0;
}
-static void nfs_idmap_unregister(struct nfs_client *clp,
- struct rpc_pipe *pipe)
-{
- struct net *net = clp->cl_net;
- struct super_block *pipefs_sb;
-
- pipefs_sb = rpc_get_sb_net(net);
- if (pipefs_sb) {
- __nfs_idmap_unregister(pipe);
- rpc_put_sb_net(net);
- }
-}
-
-static int nfs_idmap_register(struct nfs_client *clp,
- struct idmap *idmap,
- struct rpc_pipe *pipe)
-{
- struct net *net = clp->cl_net;
- struct super_block *pipefs_sb;
- int err = 0;
-
- pipefs_sb = rpc_get_sb_net(net);
- if (pipefs_sb) {
- if (clp->cl_rpcclient->cl_dentry)
- err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
- idmap, pipe);
- rpc_put_sb_net(net);
- }
- return err;
-}
+static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
+ .create = nfs_idmap_pipe_create,
+ .destroy = nfs_idmap_pipe_destroy,
+};
int
nfs_idmap_new(struct nfs_client *clp)
@@ -465,23 +445,31 @@ nfs_idmap_new(struct nfs_client *clp)
if (idmap == NULL)
return -ENOMEM;
+ rpc_init_pipe_dir_object(&idmap->idmap_pdo,
+ &nfs_idmap_pipe_dir_object_ops,
+ idmap);
+
pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
if (IS_ERR(pipe)) {
error = PTR_ERR(pipe);
- kfree(idmap);
- return error;
- }
- error = nfs_idmap_register(clp, idmap, pipe);
- if (error) {
- rpc_destroy_pipe_data(pipe);
- kfree(idmap);
- return error;
+ goto err;
}
idmap->idmap_pipe = pipe;
mutex_init(&idmap->idmap_mutex);
+ error = rpc_add_pipe_dir_object(clp->cl_net,
+ &clp->cl_rpcclient->cl_pipedir_objects,
+ &idmap->idmap_pdo);
+ if (error)
+ goto err_destroy_pipe;
+
clp->cl_idmap = idmap;
return 0;
+err_destroy_pipe:
+ rpc_destroy_pipe_data(idmap->idmap_pipe);
+err:
+ kfree(idmap);
+ return error;
}
void
@@ -491,130 +479,26 @@ nfs_idmap_delete(struct nfs_client *clp)
if (!idmap)
return;
- nfs_idmap_unregister(clp, idmap->idmap_pipe);
- rpc_destroy_pipe_data(idmap->idmap_pipe);
clp->cl_idmap = NULL;
+ rpc_remove_pipe_dir_object(clp->cl_net,
+ &clp->cl_rpcclient->cl_pipedir_objects,
+ &idmap->idmap_pdo);
+ rpc_destroy_pipe_data(idmap->idmap_pipe);
kfree(idmap);
}
-static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
- struct super_block *sb)
-{
- int err = 0;
-
- switch (event) {
- case RPC_PIPEFS_MOUNT:
- err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
- clp->cl_idmap,
- clp->cl_idmap->idmap_pipe);
- break;
- case RPC_PIPEFS_UMOUNT:
- if (clp->cl_idmap->idmap_pipe) {
- struct dentry *parent;
-
- parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
- __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
- /*
- * Note: This is a dirty hack. SUNRPC hook has been
- * called already but simple_rmdir() call for the
- * directory returned with error because of idmap pipe
- * inside. Thus now we have to remove this directory
- * here.
- */
- if (rpc_rmdir(parent))
- printk(KERN_ERR "NFS: %s: failed to remove "
- "clnt dir!\n", __func__);
- }
- break;
- default:
- printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
- event);
- return -ENOTSUPP;
- }
- return err;
-}
-
-static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
-{
- struct nfs_net *nn = net_generic(net, nfs_net_id);
- struct dentry *cl_dentry;
- struct nfs_client *clp;
- int err;
-
-restart:
- spin_lock(&nn->nfs_client_lock);
- list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- /* Wait for initialisation to finish */
- if (clp->cl_cons_state == NFS_CS_INITING) {
- atomic_inc(&clp->cl_count);
- spin_unlock(&nn->nfs_client_lock);
- err = nfs_wait_client_init_complete(clp);
- nfs_put_client(clp);
- if (err)
- return NULL;
- goto restart;
- }
- /* Skip nfs_clients that failed to initialise */
- if (clp->cl_cons_state < 0)
- continue;
- smp_rmb();
- if (clp->rpc_ops != &nfs_v4_clientops)
- continue;
- cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
- if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
- ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
- continue;
- atomic_inc(&clp->cl_count);
- spin_unlock(&nn->nfs_client_lock);
- return clp;
- }
- spin_unlock(&nn->nfs_client_lock);
- return NULL;
-}
-
-static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
- void *ptr)
-{
- struct super_block *sb = ptr;
- struct nfs_client *clp;
- int error = 0;
-
- if (!try_module_get(THIS_MODULE))
- return 0;
-
- while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
- error = __rpc_pipefs_event(clp, event, sb);
- nfs_put_client(clp);
- if (error)
- break;
- }
- module_put(THIS_MODULE);
- return error;
-}
-
-#define PIPEFS_NFS_PRIO 1
-
-static struct notifier_block nfs_idmap_block = {
- .notifier_call = rpc_pipefs_event,
- .priority = SUNRPC_PIPEFS_NFS_PRIO,
-};
-
int nfs_idmap_init(void)
{
int ret;
ret = nfs_idmap_init_keyring();
if (ret != 0)
goto out;
- ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
- if (ret != 0)
- nfs_idmap_quit_keyring();
out:
return ret;
}
void nfs_idmap_quit(void)
{
- rpc_pipefs_notifier_unregister(&nfs_idmap_block);
nfs_idmap_quit_keyring();
}
@@ -725,9 +609,9 @@ out1:
return ret;
}
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
{
- return key_instantiate_and_link(key, data, strlen(data) + 1,
+ return key_instantiate_and_link(key, data, datalen,
id_resolver_cache->thread_keyring,
authkey);
}
@@ -737,6 +621,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
struct key *key, struct key *authkey)
{
char id_str[NFS_UINT_MAXLEN];
+ size_t len;
int ret = -ENOKEY;
/* ret = -ENOKEY */
@@ -746,13 +631,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
case IDMAP_CONV_NAMETOID:
if (strcmp(upcall->im_name, im->im_name) != 0)
break;
- sprintf(id_str, "%d", im->im_id);
- ret = nfs_idmap_instantiate(key, authkey, id_str);
+ /* Note: here we store the NUL terminator too */
+ len = sprintf(id_str, "%d", im->im_id) + 1;
+ ret = nfs_idmap_instantiate(key, authkey, id_str, len);
break;
case IDMAP_CONV_IDTONAME:
if (upcall->im_id != im->im_id)
break;
- ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+ len = strlen(im->im_name);
+ ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
break;
default:
ret = -EINVAL;
@@ -764,7 +651,7 @@ out:
static ssize_t
idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
- struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
+ struct rpc_inode *rpci = RPC_I(file_inode(filp));
struct idmap *idmap = (struct idmap *)rpci->private;
struct key_construction *cons;
struct idmap_msg im;
@@ -836,43 +723,65 @@ idmap_release_pipe(struct inode *inode)
nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
}
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
+ __u32 id = -1;
+ int ret = 0;
- if (nfs_map_string_to_numeric(name, namelen, uid))
- return 0;
- return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
+ if (!nfs_map_string_to_numeric(name, namelen, &id))
+ ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
+ if (ret == 0) {
+ *uid = make_kuid(&init_user_ns, id);
+ if (!uid_valid(*uid))
+ ret = -ERANGE;
+ }
+ trace_nfs4_map_name_to_uid(name, namelen, id, ret);
+ return ret;
}
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
+ __u32 id = -1;
+ int ret = 0;
- if (nfs_map_string_to_numeric(name, namelen, gid))
- return 0;
- return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
+ if (!nfs_map_string_to_numeric(name, namelen, &id))
+ ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
+ if (ret == 0) {
+ *gid = make_kgid(&init_user_ns, id);
+ if (!gid_valid(*gid))
+ ret = -ERANGE;
+ }
+ trace_nfs4_map_group_to_gid(name, namelen, id, ret);
+ return ret;
}
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
+int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
+ __u32 id;
+ id = from_kuid(&init_user_ns, uid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
+ ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
if (ret < 0)
- ret = nfs_map_numeric_to_string(uid, buf, buflen);
+ ret = nfs_map_numeric_to_string(id, buf, buflen);
+ trace_nfs4_map_uid_to_name(buf, ret, id, ret);
return ret;
}
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
{
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
+ __u32 id;
+ id = from_kgid(&init_user_ns, gid);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
- ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
+ ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
if (ret < 0)
- ret = nfs_map_numeric_to_string(gid, buf, buflen);
+ ret = nfs_map_numeric_to_string(id, buf, buflen);
+ trace_nfs4_map_gid_to_group(buf, ret, id, ret);
return ret;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0..9927913c97c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,7 +38,6 @@
#include <linux/slab.h>
#include <linux/compat.h>
#include <linux/freezer.h>
-#include <linux/crc32.h>
#include <asm/uaccess.h>
@@ -48,11 +47,12 @@
#include "iostat.h"
#include "internal.h"
#include "fscache.h"
-#include "dns_resolve.h"
#include "pnfs.h"
#include "nfs.h"
#include "netns.h"
+#include "nfstrace.h"
+
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
@@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
- freezable_schedule();
+ freezable_schedule_unsafe();
return 0;
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
@@ -122,13 +122,13 @@ void nfs_clear_inode(struct inode *inode)
WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files));
nfs_zap_acl_cache(inode);
nfs_access_zap_cache(inode);
- nfs_fscache_release_inode_cookie(inode);
+ nfs_fscache_clear_inode(inode);
}
EXPORT_SYMBOL_GPL(nfs_clear_inode);
void nfs_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
nfs_clear_inode(inode);
}
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
return ret;
}
+static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
+ nfsi->cache_validity |= flags;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
+}
+
/*
* Invalidate the local caches
*/
@@ -162,11 +173,17 @@ static void nfs_zap_caches_locked(struct inode *inode)
memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
- nfs_fscache_invalidate(inode);
- } else {
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
- }
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_DATA
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_PAGECACHE);
+ } else
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL
+ | NFS_INO_REVAL_PAGECACHE);
+ nfs_zap_label_cache_locked(nfsi);
}
void nfs_zap_caches(struct inode *inode)
@@ -180,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
{
if (mapping->nrpages != 0) {
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
spin_unlock(&inode->i_lock);
}
}
@@ -202,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -237,6 +253,8 @@ nfs_find_actor(struct inode *inode, void *opaque)
if (NFS_FILEID(inode) != fattr->fileid)
return 0;
+ if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode))
+ return 0;
if (nfs_compare_fh(NFS_FH(inode), fh))
return 0;
if (is_bad_inode(inode) || NFS_STALE(inode))
@@ -255,12 +273,74 @@ nfs_init_locked(struct inode *inode, void *opaque)
return 0;
}
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static void nfs_clear_label_invalid(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL;
+ spin_unlock(&inode->i_lock);
+}
+
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
+{
+ int error;
+
+ if (label == NULL)
+ return;
+
+ if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) {
+ error = security_inode_notifysecctx(inode, label->label,
+ label->len);
+ if (error)
+ printk(KERN_ERR "%s() %s %d "
+ "security_inode_notifysecctx() %d\n",
+ __func__,
+ (char *)label->label,
+ label->len, error);
+ nfs_clear_label_invalid(inode);
+ }
+}
+
+struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
+{
+ struct nfs4_label *label = NULL;
+ int minor_version = server->nfs_client->cl_minorversion;
+
+ if (minor_version < 2)
+ return label;
+
+ if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+ return label;
+
+ label = kzalloc(sizeof(struct nfs4_label), flags);
+ if (label == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ label->label = kzalloc(NFS4_MAXLABELLEN, flags);
+ if (label->label == NULL) {
+ kfree(label);
+ return ERR_PTR(-ENOMEM);
+ }
+ label->len = NFS4_MAXLABELLEN;
+
+ return label;
+}
+EXPORT_SYMBOL_GPL(nfs4_label_alloc);
+#else
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
+{
+}
+#endif
+EXPORT_SYMBOL_GPL(nfs_setsecurity);
+
/*
* This is our front-end to iget that looks up inodes by file handle
* instead of inode number.
*/
struct inode *
-nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
+nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs_find_desc desc = {
.fh = fh,
@@ -298,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_mode = fattr->mode;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -332,8 +412,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_version = 0;
inode->i_size = 0;
clear_nlink(inode);
- inode->i_uid = -2;
- inode->i_gid = -2;
+ inode->i_uid = make_kuid(&init_user_ns, -2);
+ inode->i_gid = make_kgid(&init_user_ns, -2);
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
nfsi->write_io = 0;
@@ -344,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode->i_version = fattr->change_attr;
else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
else
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR
- | NFS_INO_REVAL_PAGECACHE;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
+ | NFS_INO_REVAL_PAGECACHE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -382,18 +462,21 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
}
+
+ nfs_setsecurity(inode, fattr, label);
+
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
nfsi->access_cache = RB_ROOT;
- nfs_fscache_init_inode_cookie(inode);
+ nfs_fscache_init_inode(inode);
unlock_new_inode(inode);
} else
nfs_refresh_inode(inode, fattr);
- dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
+ dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
+ (unsigned long long)NFS_FILEID(inode),
nfs_display_fhandle_hash(fh),
atomic_read(&inode->i_count));
@@ -431,6 +514,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
+ trace_nfs_setattr_enter(inode);
+
/* Write all dirty data */
if (S_ISREG(inode->i_mode)) {
nfs_inode_dio_wait(inode);
@@ -447,9 +532,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
NFS_PROTO(inode)->return_delegation(inode);
error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
if (error == 0)
- nfs_refresh_inode(inode, fattr);
+ error = nfs_refresh_inode(inode, fattr);
nfs_free_fattr(fattr);
out:
+ trace_nfs_setattr_exit(inode, error);
return error;
}
EXPORT_SYMBOL_GPL(nfs_setattr);
@@ -465,7 +551,6 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
- loff_t oldsize;
int err;
err = inode_newsize_ok(inode, offset);
@@ -473,11 +558,13 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
goto out;
spin_lock(&inode->i_lock);
- oldsize = inode->i_size;
i_size_write(inode, offset);
+ /* Optimisation */
+ if (offset == 0)
+ NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
- truncate_pagecache(inode, oldsize, offset);
+ truncate_pagecache(inode, offset);
out:
return err;
}
@@ -503,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_uid = attr->ia_uid;
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
+ | NFS_INO_INVALID_ACL);
spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -513,12 +601,32 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
+static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ parent = dget_parent(dentry);
+ nfs_force_use_readdirplus(parent->d_inode);
+ dput(parent);
+}
+
+static bool nfs_need_revalidate_inode(struct inode *inode)
+{
+ if (NFS_I(inode)->cache_validity &
+ (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+ return true;
+ if (nfs_attribute_cache_expired(inode))
+ return true;
+ return false;
+}
+
int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
int err;
+ trace_nfs_getattr_enter(inode);
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
nfs_inode_dio_wait(inode);
@@ -540,15 +648,19 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
need_atime = 0;
- if (need_atime)
- err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- else
- err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (need_atime || nfs_need_revalidate_inode(inode)) {
+ struct nfs_server *server = NFS_SERVER(inode);
+
+ if (server->caps & NFS_CAP_READDIRPLUS)
+ nfs_request_parent_use_readdirplus(dentry);
+ err = __nfs_revalidate_inode(server, inode);
+ }
if (!err) {
generic_fillattr(inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
}
out:
+ trace_nfs_getattr_exit(inode, err);
return err;
}
EXPORT_SYMBOL_GPL(nfs_getattr);
@@ -559,20 +671,22 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
l_ctx->lockowner.l_owner = current->files;
l_ctx->lockowner.l_pid = current->tgid;
INIT_LIST_HEAD(&l_ctx->list);
+ nfs_iocounter_init(&l_ctx->io_count);
}
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
{
- struct nfs_lock_context *pos;
+ struct nfs_lock_context *head = &ctx->lock_context;
+ struct nfs_lock_context *pos = head;
- list_for_each_entry(pos, &ctx->lock_context.list, list) {
+ do {
if (pos->lockowner.l_owner != current->files)
continue;
if (pos->lockowner.l_pid != current->tgid)
continue;
atomic_inc(&pos->count);
return pos;
- }
+ } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
return NULL;
}
@@ -694,10 +808,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
dput(ctx->dentry);
- if (is_sync)
- nfs_sb_deactive(sb);
- else
- nfs_sb_deactive_async(sb);
+ nfs_sb_deactive(sb);
kfree(ctx->mdsthreshold);
kfree(ctx);
}
@@ -712,16 +823,23 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
* Ensure that mmap has a recent RPC credential for use when writing out
* shared pages
*/
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
{
- struct inode *inode = filp->f_path.dentry->d_inode;
+ struct inode *inode = ctx->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
- filp->private_data = get_nfs_open_context(ctx);
spin_lock(&inode->i_lock);
list_add(&ctx->list, &nfsi->open_files);
spin_unlock(&inode->i_lock);
}
+EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
+
+void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+{
+ filp->private_data = get_nfs_open_context(ctx);
+ if (list_empty(&ctx->list))
+ nfs_inode_attach_open_context(ctx);
+}
EXPORT_SYMBOL_GPL(nfs_file_set_open_context);
/*
@@ -747,10 +865,11 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
static void nfs_file_clear_open_context(struct file *filp)
{
- struct inode *inode = filp->f_path.dentry->d_inode;
struct nfs_open_context *ctx = nfs_file_open_context(filp);
if (ctx) {
+ struct inode *inode = ctx->dentry->d_inode;
+
filp->private_data = NULL;
spin_lock(&inode->i_lock);
list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
@@ -771,7 +890,7 @@ int nfs_open(struct inode *inode, struct file *filp)
return PTR_ERR(ctx);
nfs_file_set_open_context(filp, ctx);
put_nfs_open_context(ctx);
- nfs_fscache_set_inode_cookie(inode, filp);
+ nfs_fscache_open_file(inode, filp);
return 0;
}
@@ -789,11 +908,14 @@ int
__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
int status = -ESTALE;
+ struct nfs4_label *label = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs_inode *nfsi = NFS_I(inode);
- dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
- inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+ dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n",
+ inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode));
+
+ trace_nfs_revalidate_inode_enter(inode);
if (is_bad_inode(inode))
goto out;
@@ -806,36 +928,48 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
goto out;
nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
- status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
+
+ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+ if (IS_ERR(label)) {
+ status = PTR_ERR(label);
+ goto out;
+ }
+
+ status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label);
if (status != 0) {
- dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode), status);
+ (unsigned long long)NFS_FILEID(inode), status);
if (status == -ESTALE) {
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
}
- goto out;
+ goto err_out;
}
status = nfs_refresh_inode(inode, fattr);
if (status) {
- dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode), status);
- goto out;
+ (unsigned long long)NFS_FILEID(inode), status);
+ goto err_out;
}
if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode);
- dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
+ nfs_setsecurity(inode, fattr, label);
+
+ dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode));
+ (unsigned long long)NFS_FILEID(inode));
- out:
+err_out:
+ nfs4_label_free(label);
+out:
nfs_free_fattr(fattr);
+ trace_nfs_revalidate_inode_exit(inode, status);
return status;
}
@@ -846,7 +980,7 @@ int nfs_attribute_timeout(struct inode *inode)
return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
}
-static int nfs_attribute_cache_expired(struct inode *inode)
+int nfs_attribute_cache_expired(struct inode *inode)
{
if (nfs_have_delegated_attributes(inode))
return 0;
@@ -862,8 +996,7 @@ static int nfs_attribute_cache_expired(struct inode *inode)
*/
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
- if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
- && !nfs_attribute_cache_expired(inode))
+ if (!nfs_need_revalidate_inode(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode);
}
@@ -872,21 +1005,29 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
-
+ int ret;
+
if (mapping->nrpages != 0) {
- int ret = invalidate_inode_pages2(mapping);
+ if (S_ISREG(inode->i_mode)) {
+ ret = nfs_sync_mapping(mapping);
+ if (ret < 0)
+ return ret;
+ }
+ ret = invalidate_inode_pages2(mapping);
if (ret < 0)
return ret;
}
- spin_lock(&inode->i_lock);
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode)) {
+ spin_lock(&inode->i_lock);
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode->i_lock);
+ }
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
nfs_fscache_wait_on_invalidate(inode);
- dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
- inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+
+ dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n",
+ inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(inode));
return 0;
}
@@ -907,6 +1048,7 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
int ret = 0;
/* swapfiles are not supposed to be shared. */
@@ -918,8 +1060,46 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
if (ret < 0)
goto out;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- ret = nfs_invalidate_mapping(inode, mapping);
+
+ /*
+ * We must clear NFS_INO_INVALID_DATA first to ensure that
+ * invalidations that come in while we're shooting down the mappings
+ * are respected. But, that leaves a race window where one revalidator
+ * can clear the flag, and then another checks it before the mapping
+ * gets invalidated. Fix that by serializing access to this part of
+ * the function.
+ *
+ * At the same time, we need to allow other tasks to see whether we
+ * might be in the middle of invalidating the pages, so we only set
+ * the bit lock here if it looks like we're going to be doing that.
+ */
+ for (;;) {
+ ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+ if (ret)
+ goto out;
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ break;
+ spin_unlock(&inode->i_lock);
+ goto out;
+ }
+
+ set_bit(NFS_INO_INVALIDATING, bitlock);
+ smp_wmb();
+ nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
+ spin_unlock(&inode->i_lock);
+ trace_nfs_invalidate_mapping_enter(inode);
+ ret = nfs_invalidate_mapping(inode, mapping);
+ trace_nfs_invalidate_mapping_exit(inode, ret);
+
+ clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
+ smp_mb__after_atomic();
+ wake_up_bit(bitlock, NFS_INO_INVALIDATING);
out:
return ret;
}
@@ -934,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& inode->i_version == fattr->pre_change_attr) {
inode->i_version = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
/* If we have atomic WCC data, we may update some attributes */
@@ -950,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
if (S_ISDIR(inode->i_mode))
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
ret |= NFS_INO_INVALID_ATTR;
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -961,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
ret |= NFS_INO_INVALID_ATTR;
}
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
-
return ret;
}
@@ -1009,9 +1186,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
/* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
- if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
+ if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
- if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
+ if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
/* Has the link count changed? */
@@ -1022,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
- nfsi->cache_validity |= invalid;
+ nfs_set_cache_invalid(inode, invalid);
nfsi->read_cache_jiffies = fattr->time_start;
return 0;
@@ -1099,8 +1276,9 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh)
{
/* wireshark uses 32-bit AUTODIN crc and does a bitwise
* not on the result */
- return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
+ return nfs_fhandle_hash(fh);
}
+EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash);
/*
* _nfs_display_fhandle - display an NFS file handle on the console
@@ -1145,6 +1323,7 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption)
}
}
}
+EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
#endif
/**
@@ -1176,11 +1355,35 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
+/*
+ * Don't trust the change_attribute, mtime, ctime or size if
+ * a pnfs LAYOUTCOMMIT is outstanding
+ */
+static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode,
+ struct nfs_fattr *fattr)
+{
+ if (pnfs_layoutcommit_outstanding(inode))
+ fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE |
+ NFS_ATTR_FATTR_MTIME |
+ NFS_ATTR_FATTR_CTIME |
+ NFS_ATTR_FATTR_SIZE);
+}
+
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
+ int ret;
+
+ trace_nfs_refresh_inode_enter(inode);
+
+ nfs_inode_attrs_handle_layoutcommit(inode, fattr);
+
if (nfs_inode_attrs_need_update(inode, fattr))
- return nfs_update_inode(inode, fattr);
- return nfs_check_inode_attributes(inode, fattr);
+ ret = nfs_update_inode(inode, fattr);
+ else
+ ret = nfs_check_inode_attributes(inode, fattr);
+
+ trace_nfs_refresh_inode_exit(inode, ret);
+ return ret;
}
/**
@@ -1209,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
- if (S_ISDIR(inode->i_mode)) {
- nfsi->cache_validity |= NFS_INO_INVALID_DATA;
- nfs_fscache_invalidate(inode);
- }
+ if (S_ISDIR(inode->i_mode))
+ invalid |= NFS_INO_INVALID_DATA;
+ nfs_set_cache_invalid(inode, invalid);
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
return nfs_refresh_inode_locked(inode, fattr);
@@ -1242,6 +1443,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
spin_lock(&inode->i_lock);
status = nfs_post_op_update_inode_locked(inode, fattr);
spin_unlock(&inode->i_lock);
+
return status;
}
EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
@@ -1319,7 +1521,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
unsigned long now = jiffies;
unsigned long save_cache_validity;
- dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n",
+ dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
nfs_display_fhandle_hash(NFS_FH(inode)),
atomic_read(&inode->i_count), fattr->valid);
@@ -1340,7 +1542,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/*
* Big trouble! The inode has become a different object.
*/
- printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n",
+ printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n",
__func__, inode->i_ino, inode->i_mode, fattr->mode);
goto out_err;
}
@@ -1381,18 +1583,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
- invalid |= save_cache_validity;
+ nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1402,10 +1606,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
- if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
- new_isize > cur_isize) {
+ if ((nfsi->npages == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ invalid &= ~NFS_INO_REVAL_PAGECACHE;
}
dprintk("NFS: isize change on server for file %s/%ld "
"(%Ld to %Ld)\n",
@@ -1415,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)new_isize);
}
} else
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
@@ -1423,7 +1628,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1434,29 +1640,32 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
}
} else if (server->caps & NFS_CAP_MODE)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
- if (inode->i_uid != fattr->uid) {
+ if (!uid_eq(inode->i_uid, fattr->uid)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_uid = fattr->uid;
}
} else if (server->caps & NFS_CAP_OWNER)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
- if (inode->i_gid != fattr->gid) {
+ if (!gid_eq(inode->i_gid, fattr->gid)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_gid = fattr->gid;
}
} else if (server->caps & NFS_CAP_OWNER_GROUP)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1469,7 +1678,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1501,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid &= ~NFS_INO_INVALID_DATA;
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
(save_cache_validity & NFS_INO_REVAL_FORCED))
- nfsi->cache_validity |= invalid;
-
- if (invalid & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
+ nfs_set_cache_invalid(inode, invalid);
return 0;
out_err:
@@ -1525,10 +1732,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return NULL;
nfsi->flags = 0UL;
nfsi->cache_validity = 0UL;
-#ifdef CONFIG_NFS_V3_ACL
- nfsi->acl_access = ERR_PTR(-EAGAIN);
- nfsi->acl_default = ERR_PTR(-EAGAIN);
-#endif
#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
@@ -1637,12 +1840,11 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
nfs_clients_init(net);
- return nfs_dns_resolver_cache_init(net);
+ return 0;
}
static void nfs_net_exit(struct net *net)
{
- nfs_dns_resolver_cache_destroy(net);
nfs_cleanup_cb_ident_idr(net);
}
@@ -1660,10 +1862,6 @@ static int __init init_nfs_fs(void)
{
int err;
- err = nfs_dns_resolver_init();
- if (err < 0)
- goto out10;;
-
err = register_pernet_subsys(&nfs_net_ops);
if (err < 0)
goto out9;
@@ -1729,8 +1927,6 @@ out7:
out8:
unregister_pernet_subsys(&nfs_net_ops);
out9:
- nfs_dns_resolver_destroy();
-out10:
return err;
}
@@ -1743,7 +1939,6 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_nfspagecache();
nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
- nfs_dns_resolver_destroy();
#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a0..f415cbf9f6c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,6 +5,7 @@
#include "nfs4_fs.h"
#include <linux/mount.h>
#include <linux/security.h>
+#include <linux/crc32.h>
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
@@ -87,8 +88,8 @@ struct nfs_parsed_mount_data {
unsigned int namlen;
unsigned int options;
unsigned int bsize;
- unsigned int auth_flavor_len;
- rpc_authflavor_t auth_flavors[1];
+ struct nfs_auth_info auth_info;
+ rpc_authflavor_t selected_flavor;
char *client_address;
unsigned int version;
unsigned int minorversion;
@@ -153,6 +154,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
rpc_authflavor_t);
int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
void nfs_server_insert_lists(struct nfs_server *);
+void nfs_server_remove_lists(struct nfs_server *);
void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
rpc_authflavor_t);
@@ -165,7 +167,7 @@ extern void nfs_free_client(struct nfs_client *);
extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
- struct nfs4_sessionid *);
+ struct nfs4_sessionid *, u32);
extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_server(
@@ -173,6 +175,9 @@ extern struct nfs_server *nfs4_create_server(
struct nfs_subversion *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
+extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
+ struct sockaddr *sap, size_t salen,
+ struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
@@ -185,6 +190,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
int ds_addrlen, int ds_proto,
unsigned int ds_timeo,
unsigned int ds_retrans);
+extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
+ struct inode *);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
@@ -224,11 +231,26 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
-extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
+int nfs_iocounter_wait(struct nfs_io_counter *c);
+
+extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_release(struct nfs_pgio_data *);
+int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+ const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
+
+static inline void nfs_iocounter_init(struct nfs_io_counter *c)
+{
+ c->flags = 0;
+ atomic_set(&c->io_count, 0);
+}
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
@@ -248,6 +270,7 @@ extern int nfs4_decode_dirent(struct xdr_stream *,
#ifdef CONFIG_NFS_V4_1
extern const u32 nfs41_maxread_overhead;
extern const u32 nfs41_maxwrite_overhead;
+extern const u32 nfs41_maxgetdevinfo_overhead;
#endif
/* nfs4proc.c */
@@ -255,15 +278,42 @@ extern const u32 nfs41_maxwrite_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags);
+static inline void nfs4_label_free(struct nfs4_label *label)
+{
+ if (label) {
+ kfree(label->label);
+ kfree(label);
+ }
+ return;
+}
+
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+ if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL))
+ nfsi->cache_validity |= NFS_INO_INVALID_LABEL;
+}
+#else
+static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
+static inline void nfs4_label_free(void *label) {}
+static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
+{
+}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr, rpc_authflavor_t authflavour);
+ const char *ip_addr);
/* dir.c */
-extern int nfs_access_cache_shrinker(struct shrinker *shrink,
- struct shrink_control *sc);
+extern void nfs_force_use_readdirplus(struct inode *dir);
+extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc);
+extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
int nfs_create(struct inode *, struct dentry *, umode_t, bool);
int nfs_mkdir(struct inode *, struct dentry *, umode_t);
@@ -278,16 +328,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
loff_t nfs_file_llseek(struct file *, loff_t, int);
int nfs_file_flush(struct file *, fl_owner_t);
-ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
size_t, unsigned int);
int nfs_file_mmap(struct file *, struct vm_area_struct *);
-ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
int nfs_file_release(struct inode *, struct file *);
int nfs_lock(struct file *, int, struct file_lock *);
int nfs_flock(struct file *, int, struct file_lock *);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
- size_t, unsigned int);
int nfs_check_flags(int);
int nfs_setlease(struct file *, long, struct file_lock **);
@@ -310,6 +358,7 @@ extern struct file_system_type nfs_xdev_fs_type;
extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
+bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
void nfs_initialise_sb(struct super_block *);
@@ -329,7 +378,6 @@ extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void);
extern void nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
-extern void nfs_sb_deactive_async(struct super_block *sb);
/* namespace.c */
#define NFS_PATH_CANONICAL 1
@@ -348,24 +396,16 @@ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
const char *);
-extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
+extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
#endif
struct nfs_pgio_completion_ops;
/* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* super.c */
void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -380,19 +420,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
-extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -405,6 +436,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
struct nfs_commit_info *cinfo);
int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max);
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
int nfs_scan_commit(struct inode *inode, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_mark_request_commit(struct nfs_page *req,
@@ -423,6 +455,8 @@ void nfs_request_remove_commit_list(struct nfs_page *req,
void nfs_init_cinfo(struct nfs_commit_info *cinfo,
struct inode *inode,
struct nfs_direct_req *dreq);
+int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
+bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
@@ -431,6 +465,13 @@ extern int nfs_migrate_page(struct address_space *,
#define nfs_migrate_page NULL
#endif
+/* unlink.c */
+extern struct rpc_task *
+nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+ struct dentry *old_dentry, struct dentry *new_dentry,
+ void (*complete)(struct rpc_task *, struct nfs_renamedata *));
+extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
@@ -441,11 +482,10 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour);
+ const char *ip_addr);
extern int nfs40_walk_client_list(struct nfs_client *clp,
struct nfs_client **result,
struct rpc_cred *cred);
@@ -568,3 +608,22 @@ u64 nfs_timespec_to_change_attr(const struct timespec *ts)
{
return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
}
+
+#ifdef CONFIG_CRC32
+/**
+ * nfs_fhandle_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+ return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
+}
+#else
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+ return 0;
+}
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 91a6faf811a..99a45283b9e 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -139,7 +139,10 @@ struct mnt_fhstatus {
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
*
- * Uses default timeout parameters specified by underlying transport.
+ * Uses default timeout parameters specified by underlying transport. On
+ * successful return, the auth_flavs list and auth_flav_len will be populated
+ * with the list from the server or a faked-up list if the server didn't
+ * provide one.
*/
int nfs_mount(struct nfs_mount_request *info)
{
@@ -195,6 +198,15 @@ int nfs_mount(struct nfs_mount_request *info)
dprintk("NFS: MNT request succeeded\n");
status = 0;
+ /*
+ * If the server didn't provide a flavor list, allow the
+ * client to try any flavor.
+ */
+ if (info->version != NFS_MNT3_VERSION || *info->auth_flav_len == 0) {
+ dprintk("NFS: Faking up auth_flavs list\n");
+ info->auth_flavs[0] = RPC_AUTH_NULL;
+ *info->auth_flav_len = 1;
+ }
out:
return status;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65..b5a0afc3ee1 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
return mnt;
}
+static int
+nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_getattr(mnt, dentry, stat);
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+}
+
+static int
+nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_setattr(dentry, attr);
+ return -EACCES;
+}
+
const struct inode_operations nfs_mountpoint_inode_operations = {
.getattr = nfs_getattr,
+ .setattr = nfs_setattr,
};
const struct inode_operations nfs_referral_inode_operations = {
+ .getattr = nfs_namespace_getattr,
+ .setattr = nfs_namespace_setattr,
};
static void nfs_expire_automounts(struct work_struct *work)
@@ -233,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
dprintk("--> nfs_do_submount()\n");
- dprintk("%s: submounting on %s/%s\n", __func__,
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dprintk("%s: submounting on %pd2\n", __func__,
+ dentry);
if (page == NULL)
goto out;
devname = nfs_devname(dentry, page, PAGE_SIZE);
@@ -260,7 +279,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
struct dentry *parent = dget_parent(dentry);
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr);
+ err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL);
dput(parent);
if (err != 0)
return ERR_PTR(err);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 06b9df49f7f..5f61b83f4a1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
/*
* typedef opaque nfsdata<>;
*/
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
{
u32 recvd, count;
__be32 *p;
@@ -290,8 +290,13 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = be32_to_cpup(p++);
fattr->nlink = be32_to_cpup(p++);
- fattr->uid = be32_to_cpup(p++);
- fattr->gid = be32_to_cpup(p++);
+ fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+ if (!uid_valid(fattr->uid))
+ goto out_uid;
+ fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+ if (!gid_valid(fattr->gid))
+ goto out_gid;
+
fattr->size = be32_to_cpup(p++);
fattr->du.nfs2.blocksize = be32_to_cpup(p++);
@@ -313,6 +318,12 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
return 0;
+out_uid:
+ dprintk("NFS: returned invalid uid\n");
+ return -EINVAL;
+out_gid:
+ dprintk("NFS: returned invalid gid\n");
+ return -EINVAL;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
@@ -351,11 +362,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_UID)
- *p++ = cpu_to_be32(attr->ia_uid);
+ *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_GID)
- *p++ = cpu_to_be32(attr->ia_gid);
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
if (attr->ia_valid & ATTR_SIZE)
@@ -602,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
* };
*/
static void encode_readargs(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -618,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_readargs(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -638,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
* };
*/
static void encode_writeargs(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -658,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_writeargs(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -846,7 +857,7 @@ out_default:
* };
*/
static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -867,7 +878,7 @@ out_default:
}
static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
/* All NFSv2 writes are "file sync" writes */
result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 4a1aafba6a2..8f854dde415 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -10,179 +10,7 @@
#define NFSDBG_FACILITY NFSDBG_PROC
-ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int pos=0, len=0;
-
-# define output(s) do { \
- if (pos + sizeof(s) <= size) { \
- memcpy(buffer + pos, s, sizeof(s)); \
- pos += sizeof(s); \
- } \
- len += sizeof(s); \
- } while(0)
-
- acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- output("system.posix_acl_access");
- posix_acl_release(acl);
- }
-
- if (S_ISDIR(inode->i_mode)) {
- acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- output("system.posix_acl_default");
- posix_acl_release(acl);
- }
- }
-
-# undef output
-
- if (!buffer || len <= size)
- return len;
- return -ERANGE;
-}
-
-ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int type, error = 0;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- acl = nfs3_proc_getacl(inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- else if (acl) {
- if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
- error = -ENODATA;
- else
- error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- posix_acl_release(acl);
- } else
- error = -ENODATA;
-
- return error;
-}
-
-int nfs3_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl;
- int type, error;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- error = nfs3_proc_setacl(inode, type, acl);
- posix_acl_release(acl);
-
- return error;
-}
-
-int nfs3_removexattr(struct dentry *dentry, const char *name)
-{
- struct inode *inode = dentry->d_inode;
- int type;
-
- if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
- type = ACL_TYPE_ACCESS;
- else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
- type = ACL_TYPE_DEFAULT;
- else
- return -EOPNOTSUPP;
-
- return nfs3_proc_setacl(inode, type, NULL);
-}
-
-static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi)
-{
- if (!IS_ERR(nfsi->acl_access)) {
- posix_acl_release(nfsi->acl_access);
- nfsi->acl_access = ERR_PTR(-EAGAIN);
- }
- if (!IS_ERR(nfsi->acl_default)) {
- posix_acl_release(nfsi->acl_default);
- nfsi->acl_default = ERR_PTR(-EAGAIN);
- }
-}
-
-void nfs3_forget_cached_acls(struct inode *inode)
-{
- dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id,
- inode->i_ino);
- spin_lock(&inode->i_lock);
- __nfs3_forget_cached_acls(NFS_I(inode));
- spin_unlock(&inode->i_lock);
-}
-
-static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- struct posix_acl *acl = ERR_PTR(-EINVAL);
-
- spin_lock(&inode->i_lock);
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = nfsi->acl_access;
- break;
-
- case ACL_TYPE_DEFAULT:
- acl = nfsi->acl_default;
- break;
-
- default:
- goto out;
- }
- if (IS_ERR(acl))
- acl = ERR_PTR(-EAGAIN);
- else
- acl = posix_acl_dup(acl);
-out:
- spin_unlock(&inode->i_lock);
- dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id,
- inode->i_ino, type, acl);
- return acl;
-}
-
-static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
- struct posix_acl *dfacl)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
-
- dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id,
- inode->i_ino, acl, dfacl);
- spin_lock(&inode->i_lock);
- __nfs3_forget_cached_acls(NFS_I(inode));
- if (!IS_ERR(acl))
- nfsi->acl_access = posix_acl_dup(acl);
- if (!IS_ERR(dfacl))
- nfsi->acl_default = posix_acl_dup(dfacl);
- spin_unlock(&inode->i_lock);
-}
-
-struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct page *pages[NFSACL_MAXPAGES] = { };
@@ -198,7 +26,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
.rpc_argp = &args,
.rpc_resp = &res,
};
- struct posix_acl *acl;
int status, count;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -207,10 +34,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
status = nfs_revalidate_inode(server, inode);
if (status < 0)
return ERR_PTR(status);
- acl = nfs3_get_cached_acl(inode, type);
- if (acl != ERR_PTR(-EAGAIN))
- return acl;
- acl = NULL;
/*
* Only get the access acl when explicitly requested: We don't
@@ -257,40 +80,41 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
}
if (res.acl_access != NULL) {
- if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) {
+ if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) ||
+ res.acl_access->a_count == 0) {
posix_acl_release(res.acl_access);
res.acl_access = NULL;
}
}
- nfs3_cache_acls(inode,
- (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL),
- (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL));
- switch(type) {
- case ACL_TYPE_ACCESS:
- acl = res.acl_access;
- res.acl_access = NULL;
- break;
+ if (res.mask & NFS_ACL)
+ set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access);
+ else
+ forget_cached_acl(inode, ACL_TYPE_ACCESS);
- case ACL_TYPE_DEFAULT:
- acl = res.acl_default;
- res.acl_default = NULL;
+ if (res.mask & NFS_DFACL)
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default);
+ else
+ forget_cached_acl(inode, ACL_TYPE_DEFAULT);
+
+ nfs_free_fattr(res.fattr);
+ if (type == ACL_TYPE_ACCESS) {
+ posix_acl_release(res.acl_default);
+ return res.acl_access;
+ } else {
+ posix_acl_release(res.acl_access);
+ return res.acl_default;
}
getout:
posix_acl_release(res.acl_access);
posix_acl_release(res.acl_default);
nfs_free_fattr(res.fattr);
-
- if (status != 0) {
- posix_acl_release(acl);
- acl = ERR_PTR(status);
- }
- return acl;
+ return ERR_PTR(status);
}
-static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
- struct posix_acl *dfacl)
+static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+ struct posix_acl *dfacl)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_fattr *fattr;
@@ -353,7 +177,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
switch (status) {
case 0:
status = nfs_refresh_inode(inode, fattr);
- nfs3_cache_acls(inode, acl, dfacl);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl);
break;
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
@@ -373,40 +198,43 @@ out:
return status;
}
-int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl)
+int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+ struct posix_acl *dfacl)
+{
+ int ret;
+ ret = __nfs3_proc_setacls(inode, acl, dfacl);
+ return (ret == -EOPNOTSUPP) ? 0 : ret;
+
+}
+
+int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
struct posix_acl *alloc = NULL, *dfacl = NULL;
int status;
if (S_ISDIR(inode->i_mode)) {
switch(type) {
- case ACL_TYPE_ACCESS:
- alloc = dfacl = nfs3_proc_getacl(inode,
- ACL_TYPE_DEFAULT);
- if (IS_ERR(alloc))
- goto fail;
- break;
-
- case ACL_TYPE_DEFAULT:
- dfacl = acl;
- alloc = acl = nfs3_proc_getacl(inode,
- ACL_TYPE_ACCESS);
- if (IS_ERR(alloc))
- goto fail;
- break;
-
- default:
- return -EINVAL;
+ case ACL_TYPE_ACCESS:
+ alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ if (IS_ERR(alloc))
+ goto fail;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ dfacl = acl;
+ alloc = acl = get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(alloc))
+ goto fail;
+ break;
}
- } else if (type != ACL_TYPE_ACCESS)
- return -EINVAL;
+ }
if (acl == NULL) {
alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(alloc))
goto fail;
}
- status = nfs3_proc_setacls(inode, acl, dfacl);
+ status = __nfs3_proc_setacls(inode, acl, dfacl);
posix_acl_release(alloc);
return status;
@@ -414,27 +242,51 @@ fail:
return PTR_ERR(alloc);
}
-int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
- umode_t mode)
+const struct xattr_handler *nfs3_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ NULL,
+};
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+ size_t size, ssize_t *result)
{
- struct posix_acl *dfacl, *acl;
- int error = 0;
+ struct posix_acl *acl;
+ char *p = data + *result;
- dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(dfacl)) {
- error = PTR_ERR(dfacl);
- return (error == -EOPNOTSUPP) ? 0 : error;
- }
- if (!dfacl)
+ acl = get_acl(inode, type);
+ if (!acl)
return 0;
- acl = posix_acl_dup(dfacl);
- error = posix_acl_create(&acl, GFP_KERNEL, &mode);
- if (error < 0)
- goto out_release_dfacl;
- error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ?
- dfacl : NULL);
+
posix_acl_release(acl);
-out_release_dfacl:
- posix_acl_release(dfacl);
- return error;
+
+ *result += strlen(name);
+ *result += 1;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ ssize_t result = 0;
+ int error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+ POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ if (error)
+ return error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+ POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ if (error)
+ return error;
+ return result;
}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 70efb63b1e4..f0afa291fd5 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -18,6 +18,7 @@
#include <linux/lockd/bind.h>
#include <linux/nfs_mount.h>
#include <linux/freezer.h>
+#include <linux/xattr.h>
#include "iostat.h"
#include "internal.h"
@@ -33,7 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+ freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
@@ -98,7 +99,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
*/
static int
nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct rpc_message msg = {
.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
@@ -143,7 +144,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
static int
nfs3_proc_lookup(struct inode *dir, struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
@@ -300,7 +302,7 @@ static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_
status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0);
nfs_post_op_update_inode(dir, data->res.dir_attr);
if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
return status;
}
@@ -316,11 +318,11 @@ static int
nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call create %s\n", dentry->d_name.name);
+ dprintk("NFS call create %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
@@ -335,11 +337,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
if (flags & O_EXCL) {
data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE;
- data->arg.create.verifier[0] = jiffies;
- data->arg.create.verifier[1] = current->pid;
+ data->arg.create.verifier[0] = cpu_to_be32(jiffies);
+ data->arg.create.verifier[1] = cpu_to_be32(current->pid);
}
- sattr->ia_mode &= ~current_umask();
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
for (;;) {
status = nfs3_do_create(dir, dentry, data);
@@ -365,7 +369,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
}
if (status != 0)
- goto out;
+ goto out_release_acls;
/* When we created the file with exclusive semantics, make
* sure we set the attributes afterwards. */
@@ -384,9 +388,14 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
dprintk("NFS reply setattr (post-create): %d\n", status);
if (status != 0)
- goto out;
+ goto out_release_acls;
}
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply create: %d\n", status);
@@ -470,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
}
static int
-nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .old_name = old_name,
- .new_dir = NFS_FH(new_dir),
- .new_name = new_name,
- };
- struct nfs_renameres res;
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
- .rpc_argp = &arg,
- .rpc_resp = &res,
- };
- int status = -ENOMEM;
-
- dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
-
- res.old_fattr = nfs_alloc_fattr();
- res.new_fattr = nfs_alloc_fattr();
- if (res.old_fattr == NULL || res.new_fattr == NULL)
- goto out;
-
- status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
- nfs_post_op_update_inode(old_dir, res.old_fattr);
- nfs_post_op_update_inode(new_dir, res.new_fattr);
-out:
- nfs_free_fattr(res.old_fattr);
- nfs_free_fattr(res.new_fattr);
- dprintk("NFS reply rename: %d\n", status);
- return status;
-}
-
-static int
nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs3_linkargs arg = {
@@ -547,7 +521,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
if (len > NFS3_MAXPATHLEN)
return -ENAMETOOLONG;
- dprintk("NFS call symlink %s\n", dentry->d_name.name);
+ dprintk("NFS call symlink %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
@@ -571,18 +545,20 @@ out:
static int
nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call mkdir %s\n", dentry->d_name.name);
-
- sattr->ia_mode &= ~current_umask();
+ dprintk("NFS call mkdir %pd\n", dentry);
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
+
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR];
data->arg.mkdir.fh = NFS_FH(dir);
data->arg.mkdir.name = dentry->d_name.name;
@@ -591,9 +567,13 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
- goto out;
+ goto out_release_acls;
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mkdir: %d\n", status);
@@ -690,19 +670,21 @@ static int
nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
dev_t rdev)
{
+ struct posix_acl *default_acl, *acl;
struct nfs3_createdata *data;
- umode_t mode = sattr->ia_mode;
int status = -ENOMEM;
- dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
+ dprintk("NFS call mknod %pd %u:%u\n", dentry,
MAJOR(rdev), MINOR(rdev));
- sattr->ia_mode &= ~current_umask();
-
data = nfs3_alloc_createdata();
if (data == NULL)
goto out;
+ status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl);
+ if (status)
+ goto out;
+
data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD];
data->arg.mknod.fh = NFS_FH(dir);
data->arg.mknod.name = dentry->d_name.name;
@@ -730,8 +712,13 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = nfs3_do_create(dir, dentry, data);
if (status != 0)
- goto out;
- status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode);
+ goto out_release_acls;
+
+ status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+
+out_release_acls:
+ posix_acl_release(acl);
+ posix_acl_release(default_acl);
out:
nfs3_free_createdata(data);
dprintk("NFS reply mknod: %d\n", status);
@@ -808,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -820,17 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
-static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
+ return 0;
}
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -841,16 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
-static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
-}
-
static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
rpc_call_start(task);
@@ -872,7 +855,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
static int
nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
- struct inode *inode = filp->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(filp);
return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
@@ -901,20 +884,28 @@ static const struct inode_operations nfs3_dir_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .removexattr = generic_removexattr,
+ .get_acl = nfs3_get_acl,
+ .set_acl = nfs3_set_acl,
+#endif
};
static const struct inode_operations nfs3_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
.listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .removexattr = generic_removexattr,
+ .get_acl = nfs3_get_acl,
+ .set_acl = nfs3_set_acl,
+#endif
};
const struct nfs_rpc_ops nfs_v3_clientops = {
@@ -936,7 +927,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.unlink_setup = nfs3_proc_unlink_setup,
.unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
.unlink_done = nfs3_proc_unlink_done,
- .rename = nfs3_proc_rename,
.rename_setup = nfs3_proc_rename_setup,
.rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
.rename_done = nfs3_proc_rename_done,
@@ -950,19 +940,16 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.fsinfo = nfs3_proc_fsinfo,
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
+ .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
.read_setup = nfs3_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
- .clear_acl_cache = nfs3_forget_cached_acls,
+ .clear_acl_cache = forget_all_cached_acls,
.close_context = nfs_close_context,
.have_delegation = nfs3_have_delegation,
.return_delegation = nfs3_return_delegation,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index cc471c72523..d6a98949af1 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -12,6 +12,9 @@ static struct nfs_subversion nfs_v3 = {
.rpc_vers = &nfs_version3,
.rpc_ops = &nfs_v3_clientops,
.sops = &nfs_sops,
+#ifdef CONFIG_NFS_V3_ACL
+ .xattr = nfs3_xattr_handlers,
+#endif
};
static int __init init_nfs_v3(void)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index bffc32406fb..8f4cbe7f4aa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -592,13 +592,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
if (attr->ia_valid & ATTR_UID) {
*p++ = xdr_one;
- *p++ = cpu_to_be32(attr->ia_uid);
+ *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
} else
*p++ = xdr_zero;
if (attr->ia_valid & ATTR_GID) {
*p++ = xdr_one;
- *p++ = cpu_to_be32(attr->ia_gid);
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
} else
*p++ = xdr_zero;
@@ -657,8 +657,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
fattr->nlink = be32_to_cpup(p++);
- fattr->uid = be32_to_cpup(p++);
- fattr->gid = be32_to_cpup(p++);
+ fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+ if (!uid_valid(fattr->uid))
+ goto out_uid;
+ fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+ if (!gid_valid(fattr->gid))
+ goto out_gid;
p = xdr_decode_size3(p, &fattr->size);
p = xdr_decode_size3(p, &fattr->du.nfs3.used);
@@ -675,6 +679,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->valid |= NFS_ATTR_FATTR_V3;
return 0;
+out_uid:
+ dprintk("NFS: returned invalid uid\n");
+ return -EINVAL;
+out_gid:
+ dprintk("NFS: returned invalid gid\n");
+ return -EINVAL;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
@@ -943,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
* };
*/
static void encode_read3args(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -956,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -982,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
* };
*/
static void encode_write3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -998,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_write3args(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -1579,7 +1589,7 @@ out_default:
* };
*/
static int decode_read3resok(struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
u32 eof, count, ocount, recvd;
__be32 *p;
@@ -1615,7 +1625,7 @@ out_overflow:
}
static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -1663,7 +1673,7 @@ out_status:
* };
*/
static int decode_write3resok(struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
__be32 *p;
@@ -1687,7 +1697,7 @@ out_eio:
}
static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a..ba2affa5194 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,10 +9,20 @@
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
+#if defined(CONFIG_NFS_V4_2)
+#define NFS4_MAX_MINOR_VERSION 2
+#elif defined(CONFIG_NFS_V4_1)
+#define NFS4_MAX_MINOR_VERSION 1
+#else
+#define NFS4_MAX_MINOR_VERSION 0
+#endif
+
#if IS_ENABLED(CONFIG_NFS_V4)
#define NFS4_MAX_LOOP_ON_RECOVER (10)
+#include <linux/seqlock.h>
+
struct idmap;
enum nfs4_client_state {
@@ -27,6 +37,8 @@ enum nfs4_client_state {
NFS4CLNT_SERVER_SCOPE_MISMATCH,
NFS4CLNT_PURGE_STATE,
NFS4CLNT_BIND_CONN_TO_SESSION,
+ NFS4CLNT_MOVED,
+ NFS4CLNT_LEASE_MOVED,
};
#define NFS4_RENEW_TIMEOUT 0x01
@@ -34,19 +46,21 @@ enum nfs4_client_state {
struct nfs4_minor_version_ops {
u32 minor_version;
+ unsigned init_caps;
- int (*call_sync)(struct rpc_clnt *clnt,
- struct nfs_server *server,
- struct rpc_message *msg,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res);
+ int (*init_client)(struct nfs_client *);
+ void (*shutdown_client)(struct nfs_client *);
bool (*match_stateid)(const nfs4_stateid *,
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
+ int (*free_lock_state)(struct nfs_server *,
+ struct nfs4_lock_state *);
+ const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ const struct nfs4_mig_recovery_ops *mig_recovery_ops;
};
#define NFS_SEQID_CONFIRMED 1
@@ -90,6 +104,8 @@ struct nfs4_state_owner {
unsigned long so_flags;
struct list_head so_states;
struct nfs_seqid_counter so_seqid;
+ seqcount_t so_reclaim_seqcount;
+ struct mutex so_delegreturn_mutex;
};
enum {
@@ -128,6 +144,7 @@ struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */
#define NFS_LOCK_INITIALIZED 0
+#define NFS_LOCK_LOST 1
unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
@@ -139,12 +156,14 @@ struct nfs4_lock_state {
enum {
LK_STATE_IN_USE,
NFS_DELEGATED_STATE, /* Current stateid is delegation */
+ NFS_OPEN_STATE, /* OPEN stateid is set */
NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
+ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */
};
struct nfs4_state {
@@ -184,8 +203,7 @@ struct nfs4_state_recovery_ops {
int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
int (*recover_lock)(struct nfs4_state *, struct file_lock *);
int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
- struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
- int (*reclaim_complete)(struct nfs_client *);
+ int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *);
int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
struct rpc_cred *);
};
@@ -196,6 +214,12 @@ struct nfs4_state_maintenance_ops {
int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
};
+struct nfs4_mig_recovery_ops {
+ int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
+ struct page *, struct rpc_cred *);
+ int (*fsid_present)(struct inode *, struct rpc_cred *);
+};
+
extern const struct dentry_operations nfs4_dentry_operations;
/* dir.c */
@@ -206,15 +230,16 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
+int nfs4_replace_transport(struct nfs_server *server,
+ const struct nfs4_fs_locations *locations);
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
-extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
extern int nfs4_destroy_clientid(struct nfs_client *clp);
@@ -224,11 +249,17 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
+extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
+ struct page *page, struct rpc_cred *);
+extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
struct nfs_fh *, struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
-extern int nfs4_release_lockowner(struct nfs4_lock_state *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
+extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
+ const struct nfs_open_context *ctx,
+ const struct nfs_lock_context *l_ctx,
+ fmode_t fmode);
#if defined(CONFIG_NFS_V4_1)
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -236,12 +267,10 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
return server->nfs_client->cl_session;
}
-extern int nfs4_setup_sequence(const struct nfs_server *server,
- struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- struct rpc_task *task);
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
struct rpc_task *task);
+extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
@@ -261,18 +290,63 @@ is_ds_client(struct nfs_client *clp)
{
return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
}
-#else /* CONFIG_NFS_v4_1 */
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
+
+static inline bool
+_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
+ struct rpc_clnt **clntp, struct rpc_message *msg)
{
- return NULL;
+ struct rpc_cred *newcred = NULL;
+ rpc_authflavor_t flavor;
+
+ if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_machine_cred != NULL)
+ /* don't call get_rpccred on the machine cred -
+ * a reference will be held for life of clp */
+ newcred = clp->cl_machine_cred;
+ spin_unlock(&clp->cl_lock);
+ msg->rpc_cred = newcred;
+
+ flavor = clp->cl_rpcclient->cl_auth->au_flavor;
+ WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I &&
+ flavor != RPC_AUTH_GSS_KRB5P);
+ *clntp = clp->cl_rpcclient;
+
+ return true;
+ }
+ return false;
}
-static inline int nfs4_setup_sequence(const struct nfs_server *server,
- struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
- struct rpc_task *task)
+/*
+ * Function responsible for determining if an rpc_message should use the
+ * machine cred under SP4_MACH_CRED and if so switching the credential and
+ * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p).
+ * Should be called before rpc_call_sync/rpc_call_async.
+ */
+static inline void
+nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
+ struct rpc_clnt **clntp, struct rpc_message *msg)
{
- rpc_call_start(task);
- return 0;
+ _nfs4_state_protect(clp, sp4_mode, clntp, msg);
+}
+
+/*
+ * Special wrapper to nfs4_state_protect for write.
+ * If WRITE can use machine cred but COMMIT cannot, make sure all writes
+ * that use machine cred use NFS_FILE_SYNC.
+ */
+static inline void
+nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
+{
+ if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
+ !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
+ wdata->args.stable = NFS_FILE_SYNC;
+}
+#else /* CONFIG_NFS_v4_1 */
+static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
+{
+ return NULL;
}
static inline bool
@@ -286,16 +360,32 @@ is_ds_client(struct nfs_client *clp)
{
return false;
}
+
+static inline void
+nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
+ struct rpc_clnt **clntp, struct rpc_message *msg)
+{
+}
+
+static inline void
+nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
extern const u32 nfs4_fattr_bitmap[3];
-extern const u32 nfs4_statfs_bitmap[2];
-extern const u32 nfs4_pathconf_bitmap[2];
+extern const u32 nfs4_statfs_bitmap[3];
+extern const u32 nfs4_pathconf_bitmap[3];
extern const u32 nfs4_fsinfo_bitmap[3];
-extern const u32 nfs4_fs_locations_bitmap[2];
+extern const u32 nfs4_fs_locations_bitmap[3];
+void nfs40_shutdown_client(struct nfs_client *);
+void nfs41_shutdown_client(struct nfs_client *);
+int nfs40_init_client(struct nfs_client *);
+int nfs41_init_client(struct nfs_client *);
void nfs4_free_client(struct nfs_client *);
struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
@@ -307,7 +397,7 @@ extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
/* nfs4state.c */
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
+struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
int nfs4_discover_server_trunking(struct nfs_client *clp,
@@ -315,7 +405,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
int nfs40_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, struct rpc_cred *);
#if defined(CONFIG_NFS_V4_1)
-struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
int nfs41_discover_server_trunking(struct nfs_client *clp,
struct nfs_client **, struct rpc_cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
@@ -338,18 +427,21 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs_inode_find_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
+extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
-extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
+extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
+extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
+extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_server_scope(struct nfs_client *,
struct nfs41_server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
+extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
fmode_t, const struct nfs_lockowner *);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
@@ -370,6 +462,7 @@ struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct
extern bool nfs4_disable_idmapping;
extern unsigned short max_session_slots;
extern unsigned short send_implementation_id;
+extern bool recover_lost_locks;
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
@@ -408,10 +501,27 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
return memcmp(dst, src, sizeof(*dst)) == 0;
}
+static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
+}
+
+static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+ return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
+}
+
+static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
+{
+ return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
+}
+
#else
#define nfs4_close_state(a, b) do { } while (0)
#define nfs4_close_sync(a, b) do { } while (0)
+#define nfs4_state_protect(a, b, c, d) do { } while (0)
+#define nfs4_state_protect_write(a, b, c, d) do { } while (0)
#endif /* CONFIG_NFS_V4 */
#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc34726812..aa9ef487604 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -6,9 +6,11 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_idmap.h>
#include <linux/nfs_mount.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/bc_xprt.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#include "internal.h"
#include "callback.h"
#include "delegation.h"
@@ -29,31 +31,149 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
if (clp->rpc_ops->version != 4 || minorversion != 0)
return ret;
-retry:
- if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
- return -ENOMEM;
+ idr_preload(GFP_KERNEL);
spin_lock(&nn->nfs_client_lock);
- ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+ ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+ if (ret >= 0)
+ clp->cl_cb_ident = ret;
spin_unlock(&nn->nfs_client_lock);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ idr_preload_end();
+ return ret < 0 ? ret : 0;
}
#ifdef CONFIG_NFS_V4_1
-static void nfs4_shutdown_session(struct nfs_client *clp)
+/**
+ * Per auth flavor data server rpc clients
+ */
+struct nfs4_ds_server {
+ struct list_head list; /* ds_clp->cl_ds_clients */
+ struct rpc_clnt *rpc_clnt;
+};
+
+/**
+ * Common lookup case for DS I/O
+ */
+static struct nfs4_ds_server *
+nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
+{
+ struct nfs4_ds_server *dss;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(dss, &ds_clp->cl_ds_clients, list) {
+ if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
+ continue;
+ goto out;
+ }
+ dss = NULL;
+out:
+ rcu_read_unlock();
+ return dss;
+}
+
+static struct nfs4_ds_server *
+nfs4_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor,
+ struct nfs4_ds_server *new)
+{
+ struct nfs4_ds_server *dss;
+
+ spin_lock(&ds_clp->cl_lock);
+ list_for_each_entry(dss, &ds_clp->cl_ds_clients, list) {
+ if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
+ continue;
+ goto out;
+ }
+ if (new)
+ list_add_rcu(&new->list, &ds_clp->cl_ds_clients);
+ dss = new;
+out:
+ spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */
+ return dss;
+}
+
+static struct nfs4_ds_server *
+nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
+{
+ struct nfs4_ds_server *dss;
+
+ dss = kmalloc(sizeof(*dss), GFP_NOFS);
+ if (dss == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor);
+ if (IS_ERR(dss->rpc_clnt)) {
+ int err = PTR_ERR(dss->rpc_clnt);
+ kfree (dss);
+ return ERR_PTR(err);
+ }
+ INIT_LIST_HEAD(&dss->list);
+
+ return dss;
+}
+
+static void
+nfs4_free_ds_server(struct nfs4_ds_server *dss)
+{
+ rpc_release_client(dss->rpc_clnt);
+ kfree(dss);
+}
+
+/**
+* Find or create a DS rpc client with th MDS server rpc client auth flavor
+* in the nfs_client cl_ds_clients list.
+*/
+struct rpc_clnt *
+nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
+{
+ struct nfs4_ds_server *dss, *new;
+ rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor;
+
+ dss = nfs4_find_ds_client(ds_clp, flavor);
+ if (dss != NULL)
+ goto out;
+ new = nfs4_alloc_ds_server(ds_clp, flavor);
+ if (IS_ERR(new))
+ return ERR_CAST(new);
+ dss = nfs4_add_ds_client(ds_clp, flavor, new);
+ if (dss != new)
+ nfs4_free_ds_server(new);
+out:
+ return dss->rpc_clnt;
+}
+EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client);
+
+static void
+nfs4_shutdown_ds_clients(struct nfs_client *clp)
+{
+ struct nfs4_ds_server *dss;
+ LIST_HEAD(shutdown_list);
+
+ while (!list_empty(&clp->cl_ds_clients)) {
+ dss = list_entry(clp->cl_ds_clients.next,
+ struct nfs4_ds_server, list);
+ list_del(&dss->list);
+ rpc_shutdown_client(dss->rpc_clnt);
+ kfree (dss);
+ }
+}
+
+void nfs41_shutdown_client(struct nfs_client *clp)
{
if (nfs4_has_session(clp)) {
+ nfs4_shutdown_ds_clients(clp);
nfs4_destroy_session(clp->cl_session);
nfs4_destroy_clientid(clp);
}
}
-#else /* CONFIG_NFS_V4_1 */
-static void nfs4_shutdown_session(struct nfs_client *clp)
+#endif /* CONFIG_NFS_V4_1 */
+
+void nfs40_shutdown_client(struct nfs_client *clp)
{
+ if (clp->cl_slot_tbl) {
+ nfs4_shutdown_slot_table(clp->cl_slot_tbl);
+ kfree(clp->cl_slot_tbl);
+ }
}
-#endif /* CONFIG_NFS_V4_1 */
struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
{
@@ -66,12 +186,19 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
if (err)
goto error;
+ if (cl_init->minorversion > NFS4_MAX_MINOR_VERSION) {
+ err = -EINVAL;
+ goto error;
+ }
+
spin_lock_init(&clp->cl_lock);
INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
+ INIT_LIST_HEAD(&clp->cl_ds_clients);
rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_minorversion = cl_init->minorversion;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ clp->cl_mig_gen = 1;
return clp;
error:
@@ -92,7 +219,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
nfs4_kill_renewd(clp);
- nfs4_shutdown_session(clp);
+ clp->cl_mvops->shutdown_client(clp);
nfs4_destroy_callback(clp);
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
@@ -139,34 +266,77 @@ static int nfs4_init_callback(struct nfs_client *clp)
return 0;
}
+/**
+ * nfs40_init_client - nfs_client initialization tasks for NFSv4.0
+ * @clp - nfs_client to initialize
+ *
+ * Returns zero on success, or a negative errno if some error occurred.
+ */
+int nfs40_init_client(struct nfs_client *clp)
+{
+ struct nfs4_slot_table *tbl;
+ int ret;
+
+ tbl = kzalloc(sizeof(*tbl), GFP_NOFS);
+ if (tbl == NULL)
+ return -ENOMEM;
+
+ ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
+ "NFSv4.0 transport Slot table");
+ if (ret) {
+ kfree(tbl);
+ return ret;
+ }
+
+ clp->cl_slot_tbl = tbl;
+ return 0;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+
+/**
+ * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+
+ * @clp - nfs_client to initialize
+ *
+ * Returns zero on success, or a negative errno if some error occurred.
+ */
+int nfs41_init_client(struct nfs_client *clp)
+{
+ struct nfs4_session *session = NULL;
+
+ /*
+ * Create the session and mark it expired.
+ * When a SEQUENCE operation encounters the expired session
+ * it will do session recovery to initialize it.
+ */
+ session = nfs4_alloc_session(clp);
+ if (!session)
+ return -ENOMEM;
+
+ clp->cl_session = session;
+
+ /*
+ * The create session reply races with the server back
+ * channel probe. Mark the client NFS_CS_SESSION_INITING
+ * so that the client back channel can find the
+ * nfs_client struct
+ */
+ nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
+ return 0;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
/*
* Initialize the minor version specific parts of an NFS4 client record
*/
static int nfs4_init_client_minor_version(struct nfs_client *clp)
{
-#if defined(CONFIG_NFS_V4_1)
- if (clp->cl_mvops->minor_version) {
- struct nfs4_session *session = NULL;
- /*
- * Create the session and mark it expired.
- * When a SEQUENCE operation encounters the expired session
- * it will do session recovery to initialize it.
- */
- session = nfs4_alloc_session(clp);
- if (!session)
- return -ENOMEM;
-
- clp->cl_session = session;
- /*
- * The create session reply races with the server back
- * channel probe. Mark the client NFS_CS_SESSION_INITING
- * so that the client back channel can find the
- * nfs_client struct
- */
- nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
- }
-#endif /* CONFIG_NFS_V4_1 */
+ int ret;
+ ret = clp->cl_mvops->init_client(clp);
+ if (ret)
+ return ret;
return nfs4_init_callback(clp);
}
@@ -182,8 +352,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
*/
struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour)
+ const char *ip_addr)
{
char buf[INET6_ADDRSTRLEN + 1];
struct nfs_client *old;
@@ -198,8 +367,14 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
+ if (clp->cl_minorversion != 0)
+ __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
- error = nfs_create_rpc_client(clp, timeparms, authflavour);
+ __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+
+ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I);
+ if (error == -EINVAL)
+ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX);
if (error < 0)
goto error;
@@ -236,14 +411,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
- if (clp != old) {
- clp->cl_preserve_clid = true;
- nfs_put_client(clp);
- clp = old;
- atomic_inc(&clp->cl_count);
- }
- return clp;
+ if (clp != old)
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ return old;
error:
nfs_mark_client_ready(clp, error);
@@ -301,18 +473,32 @@ int nfs40_walk_client_list(struct nfs_client *new,
struct rpc_cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
- struct nfs_client *pos, *n, *prev = NULL;
+ struct nfs_client *pos, *prev = NULL;
struct nfs4_setclientid_res clid = {
.clientid = new->cl_clientid,
.confirm = new->cl_confirm,
};
- int status;
+ int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
- list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+ list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos" */
- if (pos->cl_cons_state < NFS_CS_READY)
+ if (pos->cl_cons_state > NFS_CS_READY) {
+ atomic_inc(&pos->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+
+ if (prev)
+ nfs_put_client(prev);
+ prev = pos;
+
+ status = nfs_wait_client_init_complete(pos);
+ if (status < 0)
+ goto out;
+ status = -NFS4ERR_STALE_CLIENTID;
+ spin_lock(&nn->nfs_client_lock);
+ }
+ if (pos->cl_cons_state != NFS_CS_READY)
continue;
if (pos->rpc_ops != new->rpc_ops)
@@ -332,40 +518,40 @@ int nfs40_walk_client_list(struct nfs_client *new,
if (prev)
nfs_put_client(prev);
+ prev = pos;
status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
- if (status == 0) {
+ switch (status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ break;
+ case 0:
nfs4_swap_callback_idents(pos, new);
- nfs_put_client(pos);
+ prev = NULL;
*result = pos;
dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
__func__, pos, atomic_read(&pos->cl_count));
- return 0;
- }
- if (status != -NFS4ERR_STALE_CLIENTID) {
- nfs_put_client(pos);
- dprintk("NFS: <-- %s status = %d, no result\n",
- __func__, status);
- return status;
+ goto out;
+ case -ERESTARTSYS:
+ case -ETIMEDOUT:
+ /* The callback path may have been inadvertently
+ * changed. Schedule recovery!
+ */
+ nfs4_schedule_path_down_recovery(pos);
+ default:
+ goto out;
}
spin_lock(&nn->nfs_client_lock);
- prev = pos;
}
+ spin_unlock(&nn->nfs_client_lock);
- /*
- * No matching nfs_client found. This should be impossible,
- * because the new nfs_client has already been added to
- * nfs_client_list by nfs_get_client().
- *
- * Don't BUG(), since the caller is holding a mutex.
- */
+ /* No match found. The server lost our clientid */
+out:
if (prev)
nfs_put_client(prev);
- spin_unlock(&nn->nfs_client_lock);
- pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
- return -NFS4ERR_STALE_CLIENTID;
+ dprintk("NFS: <-- %s status = %d\n", __func__, status);
+ return status;
}
#ifdef CONFIG_NFS_V4_1
@@ -431,16 +617,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
struct rpc_cred *cred)
{
struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
- struct nfs_client *pos, *n, *prev = NULL;
- int error;
+ struct nfs_client *pos, *prev = NULL;
+ int status = -NFS4ERR_STALE_CLIENTID;
spin_lock(&nn->nfs_client_lock);
- list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+ list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos", especially the client
* ID and serverowner fields. Wait for CREATE_SESSION
* to finish. */
- if (pos->cl_cons_state < NFS_CS_READY) {
+ if (pos->cl_cons_state > NFS_CS_READY) {
atomic_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
@@ -448,15 +634,18 @@ int nfs41_walk_client_list(struct nfs_client *new,
nfs_put_client(prev);
prev = pos;
- error = nfs_wait_client_init_complete(pos);
- if (error < 0) {
- nfs_put_client(pos);
- spin_lock(&nn->nfs_client_lock);
- continue;
+ status = nfs_wait_client_init_complete(pos);
+ if (status == 0) {
+ nfs4_schedule_lease_recovery(pos);
+ status = nfs4_wait_clnt_recover(pos);
}
-
spin_lock(&nn->nfs_client_lock);
+ if (status < 0)
+ break;
+ status = -NFS4ERR_STALE_CLIENTID;
}
+ if (pos->cl_cons_state != NFS_CS_READY)
+ continue;
if (pos->rpc_ops != new->rpc_ops)
continue;
@@ -473,24 +662,20 @@ int nfs41_walk_client_list(struct nfs_client *new,
if (!nfs4_match_serverowners(pos, new))
continue;
- spin_unlock(&nn->nfs_client_lock);
+ atomic_inc(&pos->cl_count);
+ *result = pos;
+ status = 0;
dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
__func__, pos, atomic_read(&pos->cl_count));
-
- *result = pos;
- return 0;
+ break;
}
- /*
- * No matching nfs_client found. This should be impossible,
- * because the new nfs_client has already been added to
- * nfs_client_list by nfs_get_client().
- *
- * Don't BUG(), since the caller is holding a mutex.
- */
+ /* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
- pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
- return -NFS4ERR_STALE_CLIENTID;
+ dprintk("NFS: <-- %s status = %d\n", __func__, status);
+ if (prev)
+ nfs_put_client(prev);
+ return status;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -555,14 +740,14 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr,
*/
struct nfs_client *
nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
- struct nfs4_sessionid *sid)
+ struct nfs4_sessionid *sid, u32 minorversion)
{
struct nfs_client *clp;
struct nfs_net *nn = net_generic(net, nfs_net_id);
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, 1) == false)
+ if (nfs4_cb_match_client(addr, clp, minorversion) == false)
continue;
if (!nfs4_has_session(clp))
@@ -585,7 +770,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
struct nfs_client *
nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
- struct nfs4_sessionid *sid)
+ struct nfs4_sessionid *sid, u32 minorversion)
{
return NULL;
}
@@ -619,6 +804,8 @@ static int nfs4_set_client(struct nfs_server *server,
if (server->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (server->options & NFS_OPTION_MIGRATION)
+ set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
@@ -709,7 +896,7 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
}
static int nfs4_server_common_setup(struct nfs_server *server,
- struct nfs_fh *mntfh)
+ struct nfs_fh *mntfh, bool auth_probe)
{
struct nfs_fattr *fattr;
int error;
@@ -723,19 +910,32 @@ static int nfs4_server_common_setup(struct nfs_server *server,
return -ENOMEM;
/* We must ensure the session is initialised first */
- error = nfs4_init_session(server);
+ error = nfs4_init_session(server->nfs_client);
if (error < 0)
goto out;
+ /* Set the basic capabilities */
+ server->caps |= server->nfs_client->cl_mvops->init_caps;
+ if (server->flags & NFS_MOUNT_NORDIRPLUS)
+ server->caps &= ~NFS_CAP_READDIRPLUS;
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping &&
+ server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+
+
/* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
+ error = nfs4_get_rootfh(server, mntfh, auth_probe);
if (error < 0)
goto out;
dprintk("Server FSID: %llx:%llx\n",
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
+ nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
nfs4_session_set_rwsize(server);
@@ -758,7 +958,7 @@ out:
* Create a version 4 volume record
*/
static int nfs4_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data)
+ struct nfs_parsed_mount_data *data)
{
struct rpc_timeout timeparms;
int error;
@@ -770,10 +970,16 @@ static int nfs4_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = data->flags;
- server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
- if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
- server->caps |= NFS_CAP_READDIRPLUS;
server->options = data->options;
+ server->auth_info = data->auth_info;
+
+ /* Use the first specified auth flavor. If this flavor isn't
+ * allowed by the server, use the SECINFO path to try the
+ * other specified flavors */
+ if (data->auth_info.flavor_len >= 1)
+ data->selected_flavor = data->auth_info.flavors[0];
+ else
+ data->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
error = nfs4_set_client(server,
@@ -781,7 +987,7 @@ static int nfs4_init_server(struct nfs_server *server,
(const struct sockaddr *)&data->nfs_server.address,
data->nfs_server.addrlen,
data->client_address,
- data->auth_flavors[0],
+ data->selected_flavor,
data->nfs_server.protocol,
&timeparms,
data->minorversion,
@@ -789,13 +995,6 @@ static int nfs4_init_server(struct nfs_server *server,
if (error < 0)
goto error;
- /*
- * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
- * authentication.
- */
- if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
- server->caps |= NFS_CAP_UIDGID_NOMAP;
-
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
if (data->wsize)
@@ -808,7 +1007,8 @@ static int nfs4_init_server(struct nfs_server *server,
server->port = data->nfs_server.port;
- error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+ error = nfs_init_server_rpcclient(server, &timeparms,
+ data->selected_flavor);
error:
/* Done */
@@ -826,6 +1026,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
struct nfs_subversion *nfs_mod)
{
struct nfs_server *server;
+ bool auth_probe;
int error;
dprintk("--> nfs4_create_server()\n");
@@ -834,12 +1035,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
if (!server)
return ERR_PTR(-ENOMEM);
+ auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
+
/* set up the general RPC client */
error = nfs4_init_server(server, mount_info->parsed);
if (error < 0)
goto error;
- error = nfs4_server_common_setup(server, mount_info->mntfh);
+ error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe);
if (error < 0)
goto error;
@@ -860,6 +1063,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
+ bool auth_probe;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -873,7 +1077,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Initialise the client representation from the parent server */
nfs_server_copy_userdata(server, parent_server);
- server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
/* Get a client representation.
* Note: NFSv4 always uses TCP, */
@@ -893,7 +1096,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- error = nfs4_server_common_setup(server, mntfh);
+ auth_probe = parent_server->auth_info.flavor_len < 1;
+
+ error = nfs4_server_common_setup(server, mntfh, auth_probe);
if (error < 0)
goto error;
@@ -905,3 +1110,112 @@ error:
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
}
+
+/*
+ * Grab the destination's particulars, including lease expiry time.
+ *
+ * Returns zero if probe succeeded and retrieved FSID matches the FSID
+ * we have cached.
+ */
+static int nfs_probe_destination(struct nfs_server *server)
+{
+ struct inode *inode = server->super->s_root->d_inode;
+ struct nfs_fattr *fattr;
+ int error;
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* Sanity: the probe won't work if the destination server
+ * does not recognize the migrated FH. */
+ error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr);
+
+ nfs_free_fattr(fattr);
+ return error;
+}
+
+/**
+ * nfs4_update_server - Move an nfs_server to a different nfs_client
+ *
+ * @server: represents FSID to be moved
+ * @hostname: new end-point's hostname
+ * @sap: new end-point's socket address
+ * @salen: size of "sap"
+ * @net: net namespace
+ *
+ * The nfs_server must be quiescent before this function is invoked.
+ * Either its session is drained (NFSv4.1+), or its transport is
+ * plugged and drained (NFSv4.0).
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+int nfs4_update_server(struct nfs_server *server, const char *hostname,
+ struct sockaddr *sap, size_t salen, struct net *net)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct rpc_clnt *clnt = server->client;
+ struct xprt_create xargs = {
+ .ident = clp->cl_proto,
+ .net = net,
+ .dstaddr = sap,
+ .addrlen = salen,
+ .servername = hostname,
+ };
+ char buf[INET6_ADDRSTRLEN + 1];
+ struct sockaddr_storage address;
+ struct sockaddr *localaddr = (struct sockaddr *)&address;
+ int error;
+
+ dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ hostname);
+
+ error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
+ if (error != 0) {
+ dprintk("<-- %s(): rpc_switch_client_transport returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ error = rpc_localaddr(clnt, localaddr, sizeof(address));
+ if (error != 0) {
+ dprintk("<-- %s(): rpc_localaddr returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ error = -EAFNOSUPPORT;
+ if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) {
+ dprintk("<-- %s(): rpc_ntop returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ nfs_server_remove_lists(server);
+ error = nfs4_set_client(server, hostname, sap, salen, buf,
+ clp->cl_rpcclient->cl_auth->au_flavor,
+ clp->cl_proto, clnt->cl_timeout,
+ clp->cl_minorversion, net);
+ nfs_put_client(clp);
+ if (error != 0) {
+ nfs_server_insert_lists(server);
+ dprintk("<-- %s(): nfs4_set_client returned %d\n",
+ __func__, error);
+ goto out;
+ }
+
+ if (server->nfs_client->cl_hostname == NULL)
+ server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ nfs_server_insert_lists(server);
+
+ error = nfs_probe_destination(server);
+ if (error < 0)
+ goto out;
+
+ dprintk("<-- %s() succeeded\n", __func__);
+
+out:
+ return error;
+}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 08ddcccb888..a816f0627a6 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
struct inode *dir;
unsigned openflags = filp->f_flags;
struct iattr attr;
+ int opened = 0;
int err;
/*
@@ -30,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
* -EOPENSTALE. The VFS will retry the lookup/create/open.
*/
- dprintk("NFS: open file(%s/%s)\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
+ dprintk("NFS: open file(%pd2)\n", dentry);
if ((openflags & O_ACCMODE) == 3)
openflags--;
@@ -55,7 +54,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_wb_all(inode);
}
- inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
switch (err) {
@@ -69,13 +68,12 @@ nfs4_file_open(struct inode *inode, struct file *filp)
goto out_drop;
}
}
- iput(inode);
if (inode != dentry->d_inode)
goto out_drop;
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
nfs_file_set_open_context(filp, ctx);
- nfs_fscache_set_inode_cookie(inode, filp);
+ nfs_fscache_open_file(inode, filp);
err = 0;
out_put_ctx:
@@ -94,7 +92,7 @@ static int
nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
- struct inode *inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(file);
do {
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -102,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
break;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- if (!ret && !datasync)
- /* application has asked for meta-data sync */
+ if (!ret)
ret = pnfs_layoutcommit_inode(inode, true);
mutex_unlock(&inode->i_mutex);
/*
@@ -120,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct file_operations nfs4_file_operations = {
.llseek = nfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = nfs_file_read,
+ .write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs4_file_open,
.flush = nfs_file_flush,
@@ -132,7 +129,7 @@ const struct file_operations nfs4_file_operations = {
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
- .splice_write = nfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 549462e5b9b..c0b3a16b4a0 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -9,7 +9,7 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
-int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe)
{
struct nfs_fsinfo fsinfo;
int ret = -ENOMEM;
@@ -21,7 +21,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
/* Start by getting the root filehandle from the server */
- ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
+ ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe);
if (ret < 0) {
dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
goto out;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 1e09eb78543..3d83cb1fdc7 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -11,9 +11,11 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/vfs.h>
#include <linux/inet.h>
#include "internal.h"
@@ -119,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry,
}
static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct nfs_server *server)
+ struct sockaddr *sa, size_t salen, struct net *net)
{
- struct net *net = rpc_net_ns(server->client);
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
@@ -133,73 +134,104 @@ static size_t nfs_parse_server_name(char *string, size_t len,
return ret;
}
-rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+/**
+ * nfs_find_best_sec - Find a security mechanism supported locally
+ * @server: NFS server struct
+ * @flavors: List of security tuples returned by SECINFO procedure
+ *
+ * Return an rpc client that uses the first security mechanism in
+ * "flavors" that is locally supported. The "flavors" array
+ * is searched in the order returned from the server, per RFC 3530
+ * recommendation and each flavor is checked for membership in the
+ * sec= mount option list if it exists.
+ *
+ * Return -EPERM if no matching flavor is found in the array.
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ *
+ */
+static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
+ struct nfs_server *server,
+ struct nfs4_secinfo_flavors *flavors)
{
- struct gss_api_mech *mech;
- struct xdr_netobj oid;
- int i;
- rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+ rpc_authflavor_t pflavor;
+ struct nfs4_secinfo4 *secinfo;
+ unsigned int i;
for (i = 0; i < flavors->num_flavors; i++) {
- struct nfs4_secinfo_flavor *flavor;
- flavor = &flavors->flavors[i];
-
- if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
- pseudoflavor = flavor->flavor;
- break;
- } else if (flavor->flavor == RPC_AUTH_GSS) {
- oid.len = flavor->gss.sec_oid4.len;
- oid.data = flavor->gss.sec_oid4.data;
- mech = gss_mech_get_by_OID(&oid);
- if (!mech)
- continue;
- pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
- gss_mech_put(mech);
- break;
+ secinfo = &flavors->flavors[i];
+
+ switch (secinfo->flavor) {
+ case RPC_AUTH_NULL:
+ case RPC_AUTH_UNIX:
+ case RPC_AUTH_GSS:
+ pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+ &secinfo->flavor_info);
+ /* does the pseudoflavor match a sec= mount opt? */
+ if (pflavor != RPC_AUTH_MAXFLAVOR &&
+ nfs_auth_info_match(&server->auth_info, pflavor)) {
+ struct rpc_clnt *new;
+ struct rpc_cred *cred;
+
+ /* Cloning creates an rpc_auth for the flavor */
+ new = rpc_clone_client_set_auth(clnt, pflavor);
+ if (IS_ERR(new))
+ continue;
+ /**
+ * Check that the user actually can use the
+ * flavor. This is mostly for RPC_AUTH_GSS
+ * where cr_init obtains a gss context
+ */
+ cred = rpcauth_lookupcred(new->cl_auth, 0);
+ if (IS_ERR(cred)) {
+ rpc_shutdown_client(new);
+ continue;
+ }
+ put_rpccred(cred);
+ return new;
+ }
}
}
-
- return pseudoflavor;
+ return ERR_PTR(-EPERM);
}
-static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+/**
+ * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
+ * return an rpc_clnt that uses the best available security flavor with
+ * respect to the secinfo flavor list and the sec= mount options.
+ *
+ * @clnt: RPC client to clone
+ * @inode: directory inode
+ * @name: lookup name
+ *
+ * Please call rpc_shutdown_client() when you are done with this rpc client.
+ */
+struct rpc_clnt *
+nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
+ struct qstr *name)
{
struct page *page;
struct nfs4_secinfo_flavors *flavors;
- rpc_authflavor_t flavor;
+ struct rpc_clnt *new;
int err;
page = alloc_page(GFP_KERNEL);
if (!page)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+
flavors = page_address(page);
err = nfs4_proc_secinfo(inode, name, flavors);
if (err < 0) {
- flavor = err;
+ new = ERR_PTR(err);
goto out;
}
- flavor = nfs_find_best_sec(flavors);
+ new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
out:
put_page(page);
- return flavor;
-}
-
-/*
- * Please call rpc_shutdown_client() when you are done with this client.
- */
-struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
- struct qstr *name)
-{
- rpc_authflavor_t flavor;
-
- flavor = nfs4_negotiate_security(inode, name);
- if ((int)flavor < 0)
- return ERR_PTR((int)flavor);
-
- return rpc_clone_client_set_auth(clnt, flavor);
+ return new;
}
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -207,6 +239,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
const struct nfs4_fs_location *location)
{
const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
unsigned int maxbuflen;
@@ -232,8 +265,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
continue;
mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
- mountdata->addr, addr_bufsize,
- NFS_SB(mountdata->sb));
+ mountdata->addr, addr_bufsize, net);
if (mountdata->addrlen == 0)
continue;
@@ -276,8 +308,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
if (locations == NULL || locations->nlocations <= 0)
goto out;
- dprintk("%s: referral at %s/%s\n", __func__,
- dentry->d_parent->d_name.name, dentry->d_name.name);
+ dprintk("%s: referral at %pd2\n", __func__, dentry);
page = (char *) __get_free_page(GFP_USER);
if (!page)
@@ -341,8 +372,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
mnt = ERR_PTR(-ENOENT);
parent = dget_parent(dentry);
- dprintk("%s: getting locations for %s/%s\n",
- __func__, parent->d_name.name, dentry->d_name.name);
+ dprintk("%s: getting locations for %pd2\n",
+ __func__, dentry);
err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
@@ -363,21 +394,130 @@ out:
struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
{
+ rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
struct dentry *parent = dget_parent(dentry);
+ struct inode *dir = parent->d_inode;
+ struct qstr *name = &dentry->d_name;
struct rpc_clnt *client;
struct vfsmount *mnt;
/* Look it up again to get its attributes and sec flavor */
- client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+ client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
dput(parent);
if (IS_ERR(client))
return ERR_CAST(client);
- if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
mnt = nfs_do_refmount(client, dentry);
- else
- mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+ goto out;
+ }
+ if (client->cl_auth->au_flavor != flavor)
+ flavor = client->cl_auth->au_flavor;
+ mnt = nfs_do_submount(dentry, fh, fattr, flavor);
+out:
rpc_shutdown_client(client);
return mnt;
}
+
+/*
+ * Try one location from the fs_locations array.
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+static int nfs4_try_replacing_one_location(struct nfs_server *server,
+ char *page, char *page2,
+ const struct nfs4_fs_location *location)
+{
+ const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ struct net *net = rpc_net_ns(server->client);
+ struct sockaddr *sap;
+ unsigned int s;
+ size_t salen;
+ int error;
+
+ sap = kmalloc(addr_bufsize, GFP_KERNEL);
+ if (sap == NULL)
+ return -ENOMEM;
+
+ error = -ENOENT;
+ for (s = 0; s < location->nservers; s++) {
+ const struct nfs4_string *buf = &location->servers[s];
+ char *hostname;
+
+ if (buf->len <= 0 || buf->len > PAGE_SIZE)
+ continue;
+
+ if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL)
+ continue;
+
+ salen = nfs_parse_server_name(buf->data, buf->len,
+ sap, addr_bufsize, net);
+ if (salen == 0)
+ continue;
+ rpc_set_port(sap, NFS_PORT);
+
+ error = -ENOMEM;
+ hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
+ if (hostname == NULL)
+ break;
+
+ error = nfs4_update_server(server, hostname, sap, salen, net);
+ kfree(hostname);
+ if (error == 0)
+ break;
+ }
+
+ kfree(sap);
+ return error;
+}
+
+/**
+ * nfs4_replace_transport - set up transport to destination server
+ *
+ * @server: export being migrated
+ * @locations: fs_locations array
+ *
+ * Returns zero on success, or a negative errno value.
+ *
+ * The client tries all the entries in the "locations" array, in the
+ * order returned by the server, until one works or the end of the
+ * array is reached.
+ */
+int nfs4_replace_transport(struct nfs_server *server,
+ const struct nfs4_fs_locations *locations)
+{
+ char *page = NULL, *page2 = NULL;
+ int loc, error;
+
+ error = -ENOENT;
+ if (locations == NULL || locations->nlocations <= 0)
+ goto out;
+
+ error = -ENOMEM;
+ page = (char *) __get_free_page(GFP_USER);
+ if (!page)
+ goto out;
+ page2 = (char *) __get_free_page(GFP_USER);
+ if (!page2)
+ goto out;
+
+ for (loc = 0; loc < locations->nlocations; loc++) {
+ const struct nfs4_fs_location *location =
+ &locations->locations[loc];
+
+ if (location == NULL || location->nservers <= 0 ||
+ location->rootpath.ncomponents == 0)
+ continue;
+
+ error = nfs4_try_replacing_one_location(server, page,
+ page2, location);
+ if (error == 0)
+ break;
+ }
+
+out:
+ free_page((unsigned long)page);
+ free_page((unsigned long)page2);
+ return error;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf747ef8665..4bf3d97cc5a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -66,6 +66,8 @@
#include "nfs4session.h"
#include "fscache.h"
+#include "nfs4trace.h"
+
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_POLL_RETRY_MIN (HZ/10)
@@ -77,15 +79,65 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
-static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *);
-static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
+static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
- struct nfs4_state *state);
+ struct nfs4_state *state, struct nfs4_label *ilabel,
+ struct nfs4_label *olabel);
#ifdef CONFIG_NFS_V4_1
-static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *);
-static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *);
+static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
+ struct rpc_cred *);
+static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *,
+ struct rpc_cred *);
+#endif
+
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static inline struct nfs4_label *
+nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr, struct nfs4_label *label)
+{
+ int err;
+
+ if (label == NULL)
+ return NULL;
+
+ if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0)
+ return NULL;
+
+ err = security_dentry_init_security(dentry, sattr->ia_mode,
+ &dentry->d_name, (void **)&label->label, &label->len);
+ if (err == 0)
+ return label;
+
+ return NULL;
+}
+static inline void
+nfs4_label_release_security(struct nfs4_label *label)
+{
+ if (label)
+ security_release_secctx(label->label, label->len);
+}
+static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
+{
+ if (label)
+ return server->attr_bitmask;
+
+ return server->attr_bitmask_nl;
+}
+#else
+static inline struct nfs4_label *
+nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
+ struct iattr *sattr, struct nfs4_label *l)
+{ return NULL; }
+static inline void
+nfs4_label_release_security(struct nfs4_label *label)
+{ return; }
+static inline u32 *
+nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
+{ return server->attr_bitmask; }
#endif
+
/* Prevent leaks of NFSv4 errors into userland */
static int nfs4_map_errors(int err)
{
@@ -93,8 +145,11 @@ static int nfs4_map_errors(int err)
return err;
switch (err) {
case -NFS4ERR_RESOURCE:
+ case -NFS4ERR_LAYOUTTRYLATER:
+ case -NFS4ERR_RECALLCONFLICT:
return -EREMOTEIO;
case -NFS4ERR_WRONGSEC:
+ case -NFS4ERR_WRONG_CRED:
return -EPERM;
case -NFS4ERR_BADOWNER:
case -NFS4ERR_BADNAME:
@@ -105,6 +160,8 @@ static int nfs4_map_errors(int err)
return -EPROTONOSUPPORT;
case -NFS4ERR_ACCESS:
return -EACCES;
+ case -NFS4ERR_FILE_OPEN:
+ return -EBUSY;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -130,7 +187,10 @@ const u32 nfs4_fattr_bitmap[3] = {
| FATTR4_WORD1_SPACE_USED
| FATTR4_WORD1_TIME_ACCESS
| FATTR4_WORD1_TIME_METADATA
- | FATTR4_WORD1_TIME_MODIFY
+ | FATTR4_WORD1_TIME_MODIFY,
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+ FATTR4_WORD2_SECURITY_LABEL
+#endif
};
static const u32 nfs4_pnfs_open_bitmap[3] = {
@@ -157,7 +217,7 @@ static const u32 nfs4_open_noattr_bitmap[3] = {
| FATTR4_WORD0_FILEID,
};
-const u32 nfs4_statfs_bitmap[2] = {
+const u32 nfs4_statfs_bitmap[3] = {
FATTR4_WORD0_FILES_AVAIL
| FATTR4_WORD0_FILES_FREE
| FATTR4_WORD0_FILES_TOTAL,
@@ -166,7 +226,7 @@ const u32 nfs4_statfs_bitmap[2] = {
| FATTR4_WORD1_SPACE_TOTAL
};
-const u32 nfs4_pathconf_bitmap[2] = {
+const u32 nfs4_pathconf_bitmap[3] = {
FATTR4_WORD0_MAXLINK
| FATTR4_WORD0_MAXNAME,
0
@@ -181,7 +241,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
FATTR4_WORD2_LAYOUT_BLKSIZE
};
-const u32 nfs4_fs_locations_bitmap[2] = {
+const u32 nfs4_fs_locations_bitmap[3] = {
FATTR4_WORD0_TYPE
| FATTR4_WORD0_CHANGE
| FATTR4_WORD0_SIZE
@@ -197,7 +257,7 @@ const u32 nfs4_fs_locations_bitmap[2] = {
| FATTR4_WORD1_TIME_ACCESS
| FATTR4_WORD1_TIME_METADATA
| FATTR4_WORD1_TIME_MODIFY
- | FATTR4_WORD1_MOUNTED_ON_FILEID
+ | FATTR4_WORD1_MOUNTED_ON_FILEID,
};
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -264,7 +324,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MIN;
if (*timeout > NFS4_POLL_RETRY_MAX)
*timeout = NFS4_POLL_RETRY_MAX;
- freezable_schedule_timeout_killable(*timeout);
+ freezable_schedule_timeout_killable_unsafe(*timeout);
if (fatal_signal_pending(current))
res = -ERESTARTSYS;
*timeout <<= 1;
@@ -293,23 +353,42 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
}
if (state == NULL)
break;
- nfs4_schedule_stateid_recovery(server, state);
+ ret = nfs4_schedule_stateid_recovery(server, state);
+ if (ret < 0)
+ break;
goto wait_on_recovery;
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) {
+ nfs_remove_bad_delegation(inode);
+ exception->retry = 1;
+ break;
+ }
if (state == NULL)
break;
- nfs_remove_bad_delegation(state->inode);
- nfs4_schedule_stateid_recovery(server, state);
+ ret = nfs4_schedule_stateid_recovery(server, state);
+ if (ret < 0)
+ break;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
- if (state != NULL)
- nfs4_schedule_stateid_recovery(server, state);
+ if (state != NULL) {
+ ret = nfs4_schedule_stateid_recovery(server, state);
+ if (ret < 0)
+ break;
+ }
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
+ case -NFS4ERR_MOVED:
+ ret = nfs4_schedule_migration_recovery(server);
+ if (ret < 0)
+ break;
+ goto wait_on_recovery;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -357,11 +436,27 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
return nfs4_map_errors(ret);
wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ return -EIO;
if (ret == 0)
exception->retry = 1;
return ret;
}
+/*
+ * Return 'true' if 'clp' is using an rpc_client that is integrity protected
+ * or 'false' otherwise.
+ */
+static bool _nfs4_is_integrity_protected(struct nfs_client *clp)
+{
+ rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor;
+
+ if (flavor == RPC_AUTH_GSS_KRB5I ||
+ flavor == RPC_AUTH_GSS_KRB5P)
+ return true;
+
+ return false;
+}
static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
{
@@ -376,21 +471,98 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
do_renew_lease(server->nfs_client, timestamp);
}
+struct nfs4_call_sync_data {
+ const struct nfs_server *seq_server;
+ struct nfs4_sequence_args *seq_args;
+ struct nfs4_sequence_res *seq_res;
+};
+
+static void nfs4_init_sequence(struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res, int cache_reply)
+{
+ args->sa_slot = NULL;
+ args->sa_cache_this = cache_reply;
+ args->sa_privileged = 0;
+
+ res->sr_slot = NULL;
+}
+
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+ args->sa_privileged = 1;
+}
+
+static int nfs40_setup_sequence(const struct nfs_server *server,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct rpc_task *task)
+{
+ struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl;
+ struct nfs4_slot *slot;
+
+ /* slot already allocated? */
+ if (res->sr_slot != NULL)
+ goto out_start;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+ goto out_sleep;
+
+ slot = nfs4_alloc_slot(tbl);
+ if (IS_ERR(slot)) {
+ if (slot == ERR_PTR(-ENOMEM))
+ task->tk_timeout = HZ >> 2;
+ goto out_sleep;
+ }
+ spin_unlock(&tbl->slot_tbl_lock);
+
+ args->sa_slot = slot;
+ res->sr_slot = slot;
+
+out_start:
+ rpc_call_start(task);
+ return 0;
+
+out_sleep:
+ if (args->sa_privileged)
+ rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
+ NULL, RPC_PRIORITY_PRIVILEGED);
+ else
+ rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+ spin_unlock(&tbl->slot_tbl_lock);
+ return -EAGAIN;
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
+{
+ struct nfs4_slot *slot = res->sr_slot;
+ struct nfs4_slot_table *tbl;
+
+ if (slot == NULL)
+ goto out;
+
+ tbl = slot->table;
+ spin_lock(&tbl->slot_tbl_lock);
+ if (!nfs41_wake_and_assign_slot(tbl, slot))
+ nfs4_free_slot(tbl, slot);
+ spin_unlock(&tbl->slot_tbl_lock);
+
+ res->sr_slot = NULL;
+out:
+ return 1;
+}
+
#if defined(CONFIG_NFS_V4_1)
static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
+ struct nfs4_slot *slot = res->sr_slot;
bool send_new_highest_used_slotid = false;
- if (!res->sr_slot) {
- /* just wake up the next guy waiting since
- * we may have not consumed a slot after all */
- dprintk("%s: No slot\n", __func__);
- return;
- }
- tbl = res->sr_slot->table;
+ tbl = slot->table;
session = tbl->session;
spin_lock(&tbl->slot_tbl_lock);
@@ -400,11 +572,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
if (tbl->highest_used_slotid > tbl->target_highest_slotid)
send_new_highest_used_slotid = true;
- if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) {
+ if (nfs41_wake_and_assign_slot(tbl, slot)) {
send_new_highest_used_slotid = false;
goto out_unlock;
}
- nfs4_free_slot(tbl, res->sr_slot);
+ nfs4_free_slot(tbl, slot);
if (tbl->highest_used_slotid != NFS4_NO_SLOT)
send_new_highest_used_slotid = false;
@@ -415,19 +587,20 @@ out_unlock:
nfs41_server_notify_highest_slotid_update(session->clp);
}
-static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
{
struct nfs4_session *session;
- struct nfs4_slot *slot;
+ struct nfs4_slot *slot = res->sr_slot;
struct nfs_client *clp;
bool interrupted = false;
int ret = 1;
+ if (slot == NULL)
+ goto out_noaction;
/* don't increment the sequence number if the task wasn't sent */
if (!RPC_WAS_SENT(task))
goto out;
- slot = res->sr_slot;
session = slot->table->session;
if (slot->interrupted) {
@@ -435,6 +608,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
interrupted = true;
}
+ trace_nfs4_sequence_done(session, res);
/* Check the SEQUENCE operation status */
switch (res->sr_status) {
case 0:
@@ -501,6 +675,7 @@ out:
/* The session may be reset by one of the error handlers. */
dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
nfs41_sequence_free_slot(res);
+out_noaction:
return ret;
retry_nowait:
if (rpc_restart_call_prepare(task)) {
@@ -514,31 +689,18 @@ out_retry:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return 0;
}
+EXPORT_SYMBOL_GPL(nfs41_sequence_done);
static int nfs4_sequence_done(struct rpc_task *task,
struct nfs4_sequence_res *res)
{
if (res->sr_slot == NULL)
return 1;
+ if (!res->sr_slot->table->session)
+ return nfs40_sequence_done(task, res);
return nfs41_sequence_done(task, res);
}
-static void nfs41_init_sequence(struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res, int cache_reply)
-{
- args->sa_slot = NULL;
- args->sa_cache_this = 0;
- args->sa_privileged = 0;
- if (cache_reply)
- args->sa_cache_this = 1;
- res->sr_slot = NULL;
-}
-
-static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
-{
- args->sa_privileged = 1;
-}
-
int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
@@ -557,7 +719,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
task->tk_timeout = 0;
spin_lock(&tbl->slot_tbl_lock);
- if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
!args->sa_privileged) {
/* The state manager will wait until the slot table is empty */
dprintk("%s session is draining\n", __func__);
@@ -576,7 +738,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
args->sa_slot = slot;
- dprintk("<-- %s slotid=%d seqid=%d\n", __func__,
+ dprintk("<-- %s slotid=%u seqid=%u\n", __func__,
slot->slot_nr, slot->seq_nr);
res->sr_slot = slot;
@@ -587,6 +749,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
* set to 1 if an rpc level failure occurs.
*/
res->sr_status = 1;
+ trace_nfs4_setup_sequence(session, args);
out_success:
rpc_call_start(task);
return 0;
@@ -602,38 +765,30 @@ out_sleep:
}
EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
-int nfs4_setup_sequence(const struct nfs_server *server,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res,
- struct rpc_task *task)
+static int nfs4_setup_sequence(const struct nfs_server *server,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct rpc_task *task)
{
struct nfs4_session *session = nfs4_get_session(server);
int ret = 0;
- if (session == NULL) {
- rpc_call_start(task);
- goto out;
- }
+ if (!session)
+ return nfs40_setup_sequence(server, args, res, task);
- dprintk("--> %s clp %p session %p sr_slot %d\n",
+ dprintk("--> %s clp %p session %p sr_slot %u\n",
__func__, session->clp, session, res->sr_slot ?
- res->sr_slot->slot_nr : -1);
+ res->sr_slot->slot_nr : NFS4_NO_SLOT);
ret = nfs41_setup_sequence(session, args, res, task);
-out:
+
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
-struct nfs41_call_sync_data {
- const struct nfs_server *seq_server;
- struct nfs4_sequence_args *seq_args;
- struct nfs4_sequence_res *seq_res;
-};
-
static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs41_call_sync_data *data = calldata;
+ struct nfs4_call_sync_data *data = calldata;
struct nfs4_session *session = nfs4_get_session(data->seq_server);
dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
@@ -643,7 +798,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
{
- struct nfs41_call_sync_data *data = calldata;
+ struct nfs4_call_sync_data *data = calldata;
nfs41_sequence_done(task, data->seq_res);
}
@@ -653,6 +808,42 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_done = nfs41_call_sync_done,
};
+#else /* !CONFIG_NFS_V4_1 */
+
+static int nfs4_setup_sequence(const struct nfs_server *server,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ struct rpc_task *task)
+{
+ return nfs40_setup_sequence(server, args, res, task);
+}
+
+static int nfs4_sequence_done(struct rpc_task *task,
+ struct nfs4_sequence_res *res)
+{
+ return nfs40_sequence_done(task, res);
+}
+
+#endif /* !CONFIG_NFS_V4_1 */
+
+static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_call_sync_data *data = calldata;
+ nfs4_setup_sequence(data->seq_server,
+ data->seq_args, data->seq_res, task);
+}
+
+static void nfs40_call_sync_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_call_sync_data *data = calldata;
+ nfs4_sequence_done(task, data->seq_res);
+}
+
+static const struct rpc_call_ops nfs40_call_sync_ops = {
+ .rpc_call_prepare = nfs40_call_sync_prepare,
+ .rpc_call_done = nfs40_call_sync_done,
+};
+
static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
struct nfs_server *server,
struct rpc_message *msg,
@@ -661,7 +852,8 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
{
int ret;
struct rpc_task *task;
- struct nfs41_call_sync_data data = {
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_call_sync_data data = {
.seq_server = server,
.seq_args = args,
.seq_res = res,
@@ -669,7 +861,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
struct rpc_task_setup task_setup = {
.rpc_client = clnt,
.rpc_message = msg,
- .callback_ops = &nfs41_call_sync_ops,
+ .callback_ops = clp->cl_mvops->call_sync_ops,
.callback_data = &data
};
@@ -683,35 +875,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
return ret;
}
-#else
-static
-void nfs41_init_sequence(struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res, int cache_reply)
-{
-}
-
-static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
-{
-}
-
-
-static int nfs4_sequence_done(struct rpc_task *task,
- struct nfs4_sequence_res *res)
-{
- return 1;
-}
-#endif /* CONFIG_NFS_V4_1 */
-
-static
-int _nfs4_call_sync(struct rpc_clnt *clnt,
- struct nfs_server *server,
- struct rpc_message *msg,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res)
-{
- return rpc_call_sync(clnt, msg, 0);
-}
-
static
int nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs_server *server,
@@ -720,9 +883,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs4_sequence_res *res,
int cache_reply)
{
- nfs41_init_sequence(args, res, cache_reply);
- return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
- args, res);
+ nfs4_init_sequence(args, res, cache_reply);
+ return nfs4_call_sync_sequence(clnt, server, msg, args, res);
}
static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
@@ -747,6 +909,7 @@ struct nfs4_opendata {
struct nfs4_string owner_name;
struct nfs4_string group_name;
struct nfs_fattr f_attr;
+ struct nfs4_label *f_label;
struct dentry *dir;
struct dentry *dentry;
struct nfs4_state_owner *owner;
@@ -754,14 +917,46 @@ struct nfs4_opendata {
struct iattr attrs;
unsigned long timestamp;
unsigned int rpc_done : 1;
+ unsigned int file_created : 1;
+ unsigned int is_recover : 1;
int rpc_status;
int cancelled;
};
+static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
+ int err, struct nfs4_exception *exception)
+{
+ if (err != -EINVAL)
+ return false;
+ if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
+ return false;
+ server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1;
+ exception->retry = 1;
+ return true;
+}
+
+static enum open_claim_type4
+nfs4_map_atomic_open_claim(struct nfs_server *server,
+ enum open_claim_type4 claim)
+{
+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ return claim;
+ switch (claim) {
+ default:
+ return claim;
+ case NFS4_OPEN_CLAIM_FH:
+ return NFS4_OPEN_CLAIM_NULL;
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ return NFS4_OPEN_CLAIM_DELEGATE_CUR;
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ return NFS4_OPEN_CLAIM_DELEGATE_PREV;
+ }
+}
static void nfs4_init_opendata_res(struct nfs4_opendata *p)
{
p->o_res.f_attr = &p->f_attr;
+ p->o_res.f_label = p->f_label;
p->o_res.seqid = p->o_arg.seqid;
p->c_res.seqid = p->c_arg.seqid;
p->o_res.server = p->o_arg.server;
@@ -773,6 +968,8 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct nfs4_state_owner *sp, fmode_t fmode, int flags,
const struct iattr *attrs,
+ struct nfs4_label *label,
+ enum open_claim_type4 claim,
gfp_t gfp_mask)
{
struct dentry *parent = dget_parent(dentry);
@@ -783,15 +980,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p = kzalloc(sizeof(*p), gfp_mask);
if (p == NULL)
goto err;
+
+ p->f_label = nfs4_label_alloc(server, gfp_mask);
+ if (IS_ERR(p->f_label))
+ goto err_free_p;
+
p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
if (p->o_arg.seqid == NULL)
- goto err_free;
+ goto err_free_label;
nfs_sb_active(dentry->d_sb);
p->dentry = dget(dentry);
p->dir = parent;
p->owner = sp;
atomic_inc(&sp->so_count);
- p->o_arg.fh = NFS_FH(dir);
p->o_arg.open_flags = flags;
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
/* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
@@ -807,11 +1008,24 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
- p->o_arg.bitmask = server->attr_bitmask;
+ p->o_arg.bitmask = nfs4_bitmask(server, label);
p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
- p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+ p->o_arg.label = label;
+ p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
+ switch (p->o_arg.claim) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ p->o_arg.fh = NFS_FH(dir);
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ case NFS4_OPEN_CLAIM_FH:
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ p->o_arg.fh = NFS_FH(dentry->d_inode);
+ }
if (attrs != NULL && attrs->ia_valid != 0) {
- __be32 verf[2];
+ __u32 verf[2];
p->o_arg.u.attrs = &p->attrs;
memcpy(&p->attrs, attrs, sizeof(p->attrs));
@@ -827,7 +1041,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
nfs4_init_opendata_res(p);
kref_init(&p->kref);
return p;
-err_free:
+
+err_free_label:
+ nfs4_label_free(p->f_label);
+err_free_p:
kfree(p);
err:
dput(parent);
@@ -844,10 +1061,14 @@ static void nfs4_opendata_free(struct kref *kref)
if (p->state != NULL)
nfs4_put_open_state(p->state);
nfs4_put_state_owner(p->owner);
+
+ nfs4_label_free(p->f_label);
+
dput(p->dir);
dput(p->dentry);
nfs_sb_deactive(sb);
nfs_fattr_free_names(&p->f_attr);
+ kfree(p->f_attr.mdsthreshold);
kfree(p);
}
@@ -896,6 +1117,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
return 0;
if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
return 0;
+ if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ return 0;
nfs_mark_delegation_referenced(delegation);
return 1;
}
@@ -915,11 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
nfs4_state_set_mode_locked(state, state->state | fmode);
}
-static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
{
+ struct nfs_client *clp = state->owner->so_server->nfs_client;
+ bool need_recover = false;
+
+ if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
+ need_recover = true;
+ if (need_recover)
+ nfs4_state_mark_reclaim_nograce(clp, state);
+}
+
+static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+ nfs4_stateid *stateid)
+{
+ if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ return true;
+ if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ nfs_test_and_clear_all_open_stateid(state);
+ return true;
+ }
+ if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+ return true;
+ return false;
+}
+
+static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
+ nfs4_stateid *stateid, fmode_t fmode)
+{
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ switch (fmode & (FMODE_READ|FMODE_WRITE)) {
+ case FMODE_WRITE:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ:
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case 0:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
+ }
+ if (stateid == NULL)
+ return;
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
+}
+
+static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
+ write_seqlock(&state->seqlock);
+ nfs_clear_open_stateid_locked(state, stateid, fmode);
+ write_sequnlock(&state->seqlock);
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
+}
+
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -930,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
case FMODE_READ|FMODE_WRITE:
set_bit(NFS_O_RDWR_STATE, &state->flags);
}
-}
-
-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
-{
- write_seqlock(&state->seqlock);
- nfs_set_open_stateid_locked(state, stateid, fmode);
- write_sequnlock(&state->seqlock);
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
}
static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
@@ -972,7 +1253,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
goto no_delegation;
spin_lock(&deleg_cur->lock);
- if (nfsi->delegation != deleg_cur ||
+ if (rcu_dereference(nfsi->delegation) != deleg_cur ||
+ test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
(deleg_cur->type & fmode) != fmode)
goto no_delegation_unlock;
@@ -993,6 +1275,8 @@ no_delegation:
__update_open_stateid(state, open_stateid, NULL, fmode);
ret = 1;
}
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
return ret;
}
@@ -1017,7 +1301,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
struct nfs4_state *state = opendata->state;
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_delegation *delegation;
- int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
+ int open_mode = opendata->o_arg.open_flags;
fmode_t fmode = opendata->o_arg.fmode;
nfs4_stateid stateid;
int ret = -EAGAIN;
@@ -1041,9 +1325,12 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
/* Save the delegation */
nfs4_stateid_copy(&stateid, &delegation->stateid);
rcu_read_unlock();
- ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
- if (ret != 0)
- goto out;
+ nfs_release_seqid(opendata->o_arg.seqid);
+ if (!opendata->is_recover) {
+ ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ if (ret != 0)
+ goto out;
+ }
ret = -EAGAIN;
/* Try to update the stateid using the delegation */
@@ -1096,29 +1383,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- ret = data->rpc_status;
- goto err;
+ if (data->rpc_status) {
+ ret = data->rpc_status;
+ goto err;
+ }
+ /* cached opens have already been processed */
+ goto update;
}
- ret = -ESTALE;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
- goto err;
-
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err;
-
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
+update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
+ atomic_inc(&state->count);
return state;
err:
@@ -1141,7 +1423,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
ret = -EAGAIN;
if (!(data->f_attr.valid & NFS_ATTR_FATTR))
goto err;
- inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+ inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label);
ret = PTR_ERR(inode);
if (IS_ERR(inode))
goto err;
@@ -1155,6 +1437,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
data->o_arg.fmode);
iput(inode);
out:
+ nfs_release_seqid(data->o_arg.seqid);
return state;
err_put_inode:
iput(inode);
@@ -1187,11 +1470,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
return ERR_PTR(-ENOENT);
}
-static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state)
+static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
+ struct nfs4_state *state, enum open_claim_type4 claim)
{
struct nfs4_opendata *opendata;
- opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS);
+ opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
+ NULL, NULL, claim, GFP_NOFS);
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
@@ -1225,11 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
struct nfs4_state *newstate;
int ret;
+ /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
/* memory barrier prior to reading state->n_* */
clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
smp_rmb();
if (state->n_rdwr != 0) {
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1237,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_wronly != 0) {
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1245,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_rdonly != 0) {
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
if (ret != 0)
return ret;
@@ -1277,11 +1564,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
fmode_t delegation_type = 0;
int status;
- opendata = nfs4_open_recoverdata_alloc(ctx, state);
+ opendata = nfs4_open_recoverdata_alloc(ctx, state,
+ NFS4_OPEN_CLAIM_PREVIOUS);
if (IS_ERR(opendata))
return PTR_ERR(opendata);
- opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
- opendata->o_arg.fh = NFS_FH(state->inode);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
@@ -1300,6 +1586,9 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
int err;
do {
err = _nfs4_do_open_reclaim(ctx, state);
+ trace_nfs4_open_reclaim(ctx, 0, err);
+ if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
+ continue;
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -1314,77 +1603,94 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
ctx = nfs4_state_find_open_context(state);
if (IS_ERR(ctx))
- return PTR_ERR(ctx);
+ return -EAGAIN;
ret = nfs4_do_open_reclaim(ctx, state);
put_nfs_open_context(ctx);
return ret;
}
-static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err)
+{
+ switch (err) {
+ default:
+ printk(KERN_ERR "NFS: %s: unhandled error "
+ "%d.\n", __func__, err);
+ case 0:
+ case -ENOENT:
+ case -ESTALE:
+ break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ case -NFS4ERR_DEADSESSION:
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+ return -EAGAIN;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_STALE_STATEID:
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ case -NFS4ERR_EXPIRED:
+ /* Don't recall a delegation if it was lost */
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ return -EAGAIN;
+ case -NFS4ERR_MOVED:
+ nfs4_schedule_migration_recovery(server);
+ return -EAGAIN;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(server->nfs_client);
+ return -EAGAIN;
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OPENMODE:
+ nfs_inode_find_state_and_recover(state->inode,
+ stateid);
+ nfs4_schedule_stateid_recovery(server, state);
+ return 0;
+ case -NFS4ERR_DELAY:
+ case -NFS4ERR_GRACE:
+ set_bit(NFS_DELEGATED_STATE, &state->flags);
+ ssleep(1);
+ return -EAGAIN;
+ case -ENOMEM:
+ case -NFS4ERR_DENIED:
+ /* kill_proc(fl->fl_pid, SIGLOST, 1); */
+ return 0;
+ }
+ return err;
+}
+
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
+ struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_opendata *opendata;
- int ret;
+ int err;
- opendata = nfs4_open_recoverdata_alloc(ctx, state);
+ opendata = nfs4_open_recoverdata_alloc(ctx, state,
+ NFS4_OPEN_CLAIM_DELEG_CUR_FH);
if (IS_ERR(opendata))
return PTR_ERR(opendata);
- opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
- ret = nfs4_open_recover(opendata, state);
+ err = nfs4_open_recover(opendata, state);
nfs4_opendata_put(opendata);
- return ret;
+ return nfs4_handle_delegation_recall_error(server, state, stateid, err);
}
-int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs4_exception exception = { };
- struct nfs_server *server = NFS_SERVER(state->inode);
- int err;
- do {
- err = _nfs4_open_delegation_recall(ctx, state, stateid);
- switch (err) {
- case 0:
- case -ENOENT:
- case -ESTALE:
- goto out;
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
- goto out;
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
- /* Don't recall a delegation if it was lost */
- nfs4_schedule_lease_recovery(server->nfs_client);
- goto out;
- case -ERESTARTSYS:
- /*
- * The show must go on: exit, but mark the
- * stateid as needing recovery.
- */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- nfs_inode_find_state_and_recover(state->inode,
- stateid);
- nfs4_schedule_stateid_recovery(server, state);
- case -ENOMEM:
- err = 0;
- goto out;
- }
- err = nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
-out:
- return err;
+ struct nfs4_opendata *data = calldata;
+
+ nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args,
+ &data->c_res.seq_res, task);
}
static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
+ nfs40_sequence_done(task, &data->c_res.seq_res);
+
data->rpc_status = task->tk_status;
if (data->rpc_status == 0) {
nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
@@ -1413,6 +1719,7 @@ out_free:
}
static const struct rpc_call_ops nfs4_open_confirm_ops = {
+ .rpc_call_prepare = nfs4_open_confirm_prepare,
.rpc_call_done = nfs4_open_confirm_done,
.rpc_release = nfs4_open_confirm_release,
};
@@ -1440,6 +1747,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
};
int status;
+ nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
@@ -1461,9 +1769,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
struct nfs4_state_owner *sp = data->owner;
+ struct nfs_client *clp = sp->so_server->nfs_client;
if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
- return;
+ goto out_wait;
/*
* Check if we still need to send an OPEN call, or if we can use
* a delegation instead.
@@ -1476,15 +1785,20 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
rcu_read_lock();
delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR &&
+ data->o_arg.claim != NFS4_OPEN_CLAIM_DELEG_CUR_FH &&
can_open_delegated(delegation, data->o_arg.fmode))
goto unlock_no_action;
rcu_read_unlock();
}
/* Update client id. */
- data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
- if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
- task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ data->o_arg.clientid = clp->cl_clientid;
+ switch (data->o_arg.claim) {
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
+ case NFS4_OPEN_CLAIM_FH:
+ task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
}
data->timestamp = jiffies;
@@ -1493,11 +1807,22 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
&data->o_res.seq_res,
task) != 0)
nfs_release_seqid(data->o_arg.seqid);
+
+ /* Set the create mode (note dependency on the session type) */
+ data->o_arg.createmode = NFS4_CREATE_UNCHECKED;
+ if (data->o_arg.open_flags & O_EXCL) {
+ data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE;
+ if (nfs4_has_persistent_session(clp))
+ data->o_arg.createmode = NFS4_CREATE_GUARDED;
+ else if (clp->cl_mvops->minor_version > 0)
+ data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1;
+ }
return;
unlock_no_action:
rcu_read_unlock();
out_no_action:
task->tk_action = NULL;
+out_wait:
nfs4_sequence_done(task, &data->o_res.seq_res);
}
@@ -1582,13 +1907,16 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
};
int status;
- nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
+ nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
data->cancelled = 0;
- if (isrecover)
+ data->is_recover = 0;
+ if (isrecover) {
nfs4_set_sequence_privileged(&o_arg->seq_args);
+ data->is_recover = 1;
+ }
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1684,8 +2012,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
nfs_fattr_map_and_free_names(server, &data->f_attr);
- if (o_arg->open_flags & O_CREAT)
+ if (o_arg->open_flags & O_CREAT) {
update_changeattr(dir, &o_res->cinfo);
+ if (o_arg->open_flags & O_EXCL)
+ data->file_created = 1;
+ else if (o_res->cinfo.before != o_res->cinfo.after)
+ data->file_created = 1;
+ }
if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
server->caps &= ~NFS_CAP_POSIX_LOCK;
if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1694,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return status;
}
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
- _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr);
+ nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
return 0;
}
@@ -1713,7 +2046,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
struct nfs4_opendata *opendata;
int ret;
- opendata = nfs4_open_recoverdata_alloc(ctx, state);
+ opendata = nfs4_open_recoverdata_alloc(ctx, state,
+ NFS4_OPEN_CLAIM_FH);
if (IS_ERR(opendata))
return PTR_ERR(opendata);
ret = nfs4_open_recover(opendata, state);
@@ -1731,6 +2065,9 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
do {
err = _nfs4_open_expired(ctx, state);
+ trace_nfs4_open_expired(ctx, 0, err);
+ if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
+ continue;
switch (err) {
default:
goto out;
@@ -1751,7 +2088,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
ctx = nfs4_state_find_open_context(state);
if (IS_ERR(ctx))
- return PTR_ERR(ctx);
+ return -EAGAIN;
ret = nfs4_do_open_expired(ctx, state);
put_nfs_open_context(ctx);
return ret;
@@ -1762,18 +2099,31 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
nfs4_stateid *stateid = &state->stateid;
- int status;
+ struct nfs_delegation *delegation;
+ struct rpc_cred *cred = NULL;
+ int status = -NFS4ERR_BAD_STATEID;
/* If a state reset has been done, test_stateid is unneeded */
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
return;
- status = nfs41_test_stateid(server, stateid);
+ /* Get the delegation credential for use by test/free_stateid */
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+ if (delegation != NULL &&
+ nfs4_stateid_match(&delegation->stateid, stateid)) {
+ cred = get_rpccred(delegation->cred);
+ rcu_read_unlock();
+ status = nfs41_test_stateid(server, stateid, cred);
+ trace_nfs4_test_delegation_stateid(state, NULL, status);
+ } else
+ rcu_read_unlock();
+
if (status != NFS_OK) {
/* Free the stateid unless the server explicitly
* informs us the stateid is unrecognized. */
if (status != -NFS4ERR_BAD_STATEID)
- nfs41_free_stateid(server, stateid);
+ nfs41_free_stateid(server, stateid, cred);
nfs_remove_bad_delegation(state->inode);
write_seqlock(&state->seqlock);
@@ -1781,6 +2131,9 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
write_sequnlock(&state->seqlock);
clear_bit(NFS_DELEGATED_STATE, &state->flags);
}
+
+ if (cred != NULL)
+ put_rpccred(cred);
}
/**
@@ -1795,6 +2148,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
nfs4_stateid *stateid = &state->open_stateid;
+ struct rpc_cred *cred = state->owner->so_cred;
int status;
/* If a state reset has been done, test_stateid is unneeded */
@@ -1803,16 +2157,18 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
(test_bit(NFS_O_RDWR_STATE, &state->flags) == 0))
return -NFS4ERR_BAD_STATEID;
- status = nfs41_test_stateid(server, stateid);
+ status = nfs41_test_stateid(server, stateid, cred);
+ trace_nfs4_test_open_stateid(state, NULL, status);
if (status != NFS_OK) {
/* Free the stateid unless the server explicitly
* informs us the stateid is unrecognized. */
if (status != -NFS4ERR_BAD_STATEID)
- nfs41_free_stateid(server, stateid);
+ nfs41_free_stateid(server, stateid, cred);
clear_bit(NFS_O_RDONLY_STATE, &state->flags);
clear_bit(NFS_O_WRONLY_STATE, &state->flags);
clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
}
return status;
}
@@ -1845,22 +2201,80 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
sattr->ia_valid |= ATTR_MTIME;
}
+static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
+ fmode_t fmode,
+ int flags,
+ struct nfs_open_context *ctx)
+{
+ struct nfs4_state_owner *sp = opendata->owner;
+ struct nfs_server *server = sp->so_server;
+ struct dentry *dentry;
+ struct nfs4_state *state;
+ unsigned int seq;
+ int ret;
+
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+
+ ret = _nfs4_proc_open(opendata);
+ if (ret != 0)
+ goto out;
+
+ state = nfs4_opendata_to_nfs4_state(opendata);
+ ret = PTR_ERR(state);
+ if (IS_ERR(state))
+ goto out;
+ if (server->caps & NFS_CAP_POSIX_LOCK)
+ set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
+
+ dentry = opendata->dentry;
+ if (dentry->d_inode == NULL) {
+ /* FIXME: Is this d_drop() ever needed? */
+ d_drop(dentry);
+ dentry = d_add_unique(dentry, igrab(state->inode));
+ if (dentry == NULL) {
+ dentry = opendata->dentry;
+ } else if (dentry != ctx->dentry) {
+ dput(ctx->dentry);
+ ctx->dentry = dget(dentry);
+ }
+ nfs_set_verifier(dentry,
+ nfs_save_change_attribute(opendata->dir->d_inode));
+ }
+
+ ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
+ if (ret != 0)
+ goto out;
+
+ ctx->state = state;
+ if (dentry->d_inode == state->inode) {
+ nfs_inode_attach_open_context(ctx);
+ if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
+ nfs4_schedule_stateid_recovery(server, state);
+ }
+out:
+ return ret;
+}
+
/*
* Returns a referenced nfs4_state
*/
static int _nfs4_do_open(struct inode *dir,
- struct dentry *dentry,
- fmode_t fmode,
+ struct nfs_open_context *ctx,
int flags,
struct iattr *sattr,
- struct rpc_cred *cred,
- struct nfs4_state **res,
- struct nfs4_threshold **ctx_th)
+ struct nfs4_label *label,
+ int *opened)
{
struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_opendata *opendata;
+ struct dentry *dentry = ctx->dentry;
+ struct rpc_cred *cred = ctx->cred;
+ struct nfs4_threshold **ctx_th = &ctx->mdsthreshold;
+ fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC);
+ enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
+ struct nfs4_label *olabel = NULL;
int status;
/* Protect against reboot recovery conflicts */
@@ -1876,83 +2290,91 @@ static int _nfs4_do_open(struct inode *dir,
if (dentry->d_inode != NULL)
nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
status = -ENOMEM;
- opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL);
+ if (dentry->d_inode)
+ claim = NFS4_OPEN_CLAIM_FH;
+ opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
+ label, claim, GFP_KERNEL);
if (opendata == NULL)
goto err_put_state_owner;
- if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
- opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
- if (!opendata->f_attr.mdsthreshold)
+ if (label) {
+ olabel = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(olabel)) {
+ status = PTR_ERR(olabel);
goto err_opendata_put;
+ }
+ }
+
+ if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+ if (!opendata->f_attr.mdsthreshold) {
+ opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
+ if (!opendata->f_attr.mdsthreshold)
+ goto err_free_label;
+ }
opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
}
if (dentry->d_inode != NULL)
opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
- status = _nfs4_proc_open(opendata);
- if (status != 0)
- goto err_opendata_put;
-
- state = nfs4_opendata_to_nfs4_state(opendata);
- status = PTR_ERR(state);
- if (IS_ERR(state))
- goto err_opendata_put;
- if (server->caps & NFS_CAP_POSIX_LOCK)
- set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
-
- status = nfs4_opendata_access(cred, opendata, state, fmode, flags);
+ status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx);
if (status != 0)
- goto err_opendata_put;
+ goto err_free_label;
+ state = ctx->state;
- if (opendata->o_arg.open_flags & O_EXCL) {
+ if ((opendata->o_arg.open_flags & O_EXCL) &&
+ (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
nfs4_exclusive_attrset(opendata, sattr);
nfs_fattr_init(opendata->o_res.f_attr);
status = nfs4_do_setattr(state->inode, cred,
opendata->o_res.f_attr, sattr,
- state);
- if (status == 0)
+ state, label, olabel);
+ if (status == 0) {
nfs_setattr_update_inode(state->inode, sattr);
- nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+ nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+ nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
+ }
}
+ if (opendata->file_created)
+ *opened |= FILE_CREATED;
- if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
+ if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
*ctx_th = opendata->f_attr.mdsthreshold;
- else
- kfree(opendata->f_attr.mdsthreshold);
- opendata->f_attr.mdsthreshold = NULL;
+ opendata->f_attr.mdsthreshold = NULL;
+ }
+
+ nfs4_label_free(olabel);
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
- *res = state;
return 0;
+err_free_label:
+ nfs4_label_free(olabel);
err_opendata_put:
- kfree(opendata->f_attr.mdsthreshold);
nfs4_opendata_put(opendata);
err_put_state_owner:
nfs4_put_state_owner(sp);
out_err:
- *res = NULL;
return status;
}
static struct nfs4_state *nfs4_do_open(struct inode *dir,
- struct dentry *dentry,
- fmode_t fmode,
+ struct nfs_open_context *ctx,
int flags,
struct iattr *sattr,
- struct rpc_cred *cred,
- struct nfs4_threshold **ctx_th)
+ struct nfs4_label *label,
+ int *opened)
{
+ struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_exception exception = { };
struct nfs4_state *res;
int status;
- fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC;
do {
- status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
- &res, ctx_th);
+ status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
+ res = ctx->state;
+ trace_nfs4_open_file(ctx, flags, status);
if (status == 0)
break;
/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1988,7 +2410,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
exception.retry = 1;
continue;
}
- res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ if (nfs4_clear_cap_atomic_open_v1(server, status, &exception))
+ continue;
+ res = ERR_PTR(nfs4_handle_exception(server,
status, &exception));
} while (exception.retry);
return res;
@@ -1996,7 +2420,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
- struct nfs4_state *state)
+ struct nfs4_state *state, struct nfs4_label *ilabel,
+ struct nfs4_label *olabel)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_setattrargs arg = {
@@ -2004,9 +2429,11 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
.iap = sattr,
.server = server,
.bitmask = server->attr_bitmask,
+ .label = ilabel,
};
struct nfs_setattrres res = {
.fattr = fattr,
+ .label = olabel,
.server = server,
};
struct rpc_message msg = {
@@ -2016,20 +2443,32 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
.rpc_cred = cred,
};
unsigned long timestamp = jiffies;
+ fmode_t fmode;
+ bool truncate;
int status;
+ arg.bitmask = nfs4_bitmask(server, ilabel);
+ if (ilabel)
+ arg.bitmask = nfs4_bitmask(server, olabel);
+
nfs_fattr_init(fattr);
- if (state != NULL) {
+ /* Servers should only apply open mode checks for file size changes */
+ truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false;
+ fmode = truncate ? FMODE_WRITE : FMODE_READ;
+
+ if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) {
+ /* Use that stateid */
+ } else if (truncate && state != NULL) {
struct nfs_lockowner lockowner = {
.l_owner = current->files,
.l_pid = current->tgid,
};
- nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
- &lockowner);
- } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
- FMODE_WRITE)) {
- /* Use that stateid */
+ if (!nfs4_valid_open_stateid(state))
+ return -EBADF;
+ if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
+ &lockowner) == -EIO)
+ return -EBADF;
} else
nfs4_stateid_copy(&arg.stateid, &zero_stateid);
@@ -2041,7 +2480,8 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
- struct nfs4_state *state)
+ struct nfs4_state *state, struct nfs4_label *ilabel,
+ struct nfs4_label *olabel)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = {
@@ -2050,9 +2490,17 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
};
int err;
do {
- err = _nfs4_do_setattr(inode, cred, fattr, sattr, state);
+ err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
+ trace_nfs4_setattr(inode, err);
switch (err) {
case -NFS4ERR_OPENMODE:
+ if (!(sattr->ia_valid & ATTR_SIZE)) {
+ pr_warn_once("NFSv4: server %s is incorrectly "
+ "applying open mode checks to "
+ "a SETATTR that is not "
+ "changing file size.\n",
+ server->nfs_client->cl_hostname);
+ }
if (state && !(state->state & FMODE_WRITE)) {
err = -EBADF;
if (sattr->ia_valid & ATTR_OPEN)
@@ -2088,22 +2536,10 @@ static void nfs4_free_closedata(void *data)
nfs4_put_open_state(calldata->state);
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
- nfs_sb_deactive_async(sb);
+ nfs_sb_deactive(sb);
kfree(calldata);
}
-static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
- fmode_t fmode)
-{
- spin_lock(&state->owner->so_lock);
- if (!(fmode & FMODE_READ))
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- if (!(fmode & FMODE_WRITE))
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- spin_unlock(&state->owner->so_lock);
-}
-
static void nfs4_close_done(struct rpc_task *task, void *data)
{
struct nfs4_closedata *calldata = data;
@@ -2113,6 +2549,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return;
+ trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
@@ -2121,11 +2558,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->roc)
pnfs_roc_set_barrier(state->inode,
calldata->roc_barrier);
- nfs_set_open_stateid(state, &calldata->res.stateid, 0);
+ nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- nfs4_close_clear_stateid_flags(state,
- calldata->arg.fmode);
- break;
+ goto out_release;
+ case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -2133,9 +2569,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
rpc_restart_call_prepare(task);
+ goto out_release;
+ }
}
+ nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
+out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
@@ -2150,7 +2590,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
dprintk("%s: begin!\n", __func__);
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
- return;
+ goto out_wait;
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2168,20 +2608,22 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
calldata->arg.fmode &= ~FMODE_WRITE;
}
}
+ if (!nfs4_valid_open_stateid(state))
+ call_close = 0;
spin_unlock(&state->owner->so_lock);
if (!call_close) {
/* Note: exit _without_ calling nfs4_close_done */
- task->tk_action = NULL;
- nfs4_sequence_done(task, &calldata->res.seq_res);
- goto out;
+ goto out_no_action;
}
if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
if (calldata->roc &&
- pnfs_roc_drain(inode, &calldata->roc_barrier, task))
- goto out;
+ pnfs_roc_drain(inode, &calldata->roc_barrier, task)) {
+ nfs_release_seqid(calldata->arg.seqid);
+ goto out_wait;
+ }
}
nfs_fattr_init(calldata->res.fattr);
@@ -2191,8 +2633,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
&calldata->res.seq_res,
task) != 0)
nfs_release_seqid(calldata->arg.seqid);
-out:
dprintk("%s: done!\n", __func__);
+ return;
+out_no_action:
+ task->tk_action = NULL;
+out_wait:
+ nfs4_sequence_done(task, &calldata->res.seq_res);
}
static const struct rpc_call_ops nfs4_close_ops = {
@@ -2231,10 +2677,13 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
};
int status = -ENOMEM;
+ nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
+ &task_setup_data.rpc_client, &msg);
+
calldata = kzalloc(sizeof(*calldata), gfp_mask);
if (calldata == NULL)
goto out;
- nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
+ nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
calldata->inode = state->inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
@@ -2271,17 +2720,22 @@ out:
}
static struct inode *
-nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
+nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
+ int open_flags, struct iattr *attr, int *opened)
{
struct nfs4_state *state;
+ struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
+
+ label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
/* Protect against concurrent sillydeletes */
- state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr,
- ctx->cred, &ctx->mdsthreshold);
+ state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened);
+
+ nfs4_label_release_security(label);
+
if (IS_ERR(state))
return ERR_CAST(state);
- ctx->state = state;
- return igrab(state->inode);
+ return state->inode;
}
static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2294,6 +2748,10 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
nfs4_close_state(ctx->state, ctx->mode);
}
+#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
+#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
+
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
struct nfs4_server_caps_arg args = {
@@ -2309,13 +2767,27 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
+ /* Sanity check the server answers */
+ switch (server->nfs_client->cl_minorversion) {
+ case 0:
+ res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK;
+ res.attr_bitmask[2] = 0;
+ break;
+ case 1:
+ res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
+ break;
+ case 2:
+ res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
+ }
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
- NFS_CAP_CTIME|NFS_CAP_MTIME);
- if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
+ NFS_CAP_CTIME|NFS_CAP_MTIME|
+ NFS_CAP_SECURITY_LABEL);
+ if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
+ res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS;
if (res.has_links != 0)
server->caps |= NFS_CAP_HARDLINKS;
@@ -2337,10 +2809,18 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_CTIME;
if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
server->caps |= NFS_CAP_MTIME;
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+ if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
+ server->caps |= NFS_CAP_SECURITY_LABEL;
+#endif
+ memcpy(server->attr_bitmask_nl, res.attr_bitmask,
+ sizeof(server->attr_bitmask));
+ server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+ server->cache_consistency_bitmask[2] = 0;
server->acl_bitmask = res.acl_bitmask;
server->fh_expire_type = res.fh_expire_type;
}
@@ -2363,8 +2843,9 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
+ u32 bitmask[3];
struct nfs4_lookup_root_arg args = {
- .bitmask = nfs4_fattr_bitmap,
+ .bitmask = bitmask,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -2377,6 +2858,13 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = &res,
};
+ bitmask[0] = nfs4_fattr_bitmap[0];
+ bitmask[1] = nfs4_fattr_bitmap[1];
+ /*
+ * Process the label in the upcoming getfattr
+ */
+ bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL;
+
nfs_fattr_init(info->fattr);
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
@@ -2388,6 +2876,7 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
int err;
do {
err = _nfs4_lookup_root(server, fhandle, info);
+ trace_nfs4_lookup_root(server, fhandle, info->fattr, err);
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
@@ -2403,12 +2892,15 @@ out:
static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info, rpc_authflavor_t flavor)
{
+ struct rpc_auth_create_args auth_args = {
+ .pseudoflavor = flavor,
+ };
struct rpc_auth *auth;
int ret;
- auth = rpcauth_create(flavor, server->client);
+ auth = rpcauth_create(&auth_args, server->client);
if (IS_ERR(auth)) {
- ret = -EIO;
+ ret = -EACCES;
goto out;
}
ret = nfs4_lookup_root(server, fhandle, info);
@@ -2416,27 +2908,49 @@ out:
return ret;
}
+/*
+ * Retry pseudoroot lookup with various security flavors. We do this when:
+ *
+ * NFSv4.0: the PUTROOTFH operation returns NFS4ERR_WRONGSEC
+ * NFSv4.1: the server does not support the SECINFO_NO_NAME operation
+ *
+ * Returns zero on success, or a negative NFS4ERR value, or a
+ * negative errno value.
+ */
static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
- int i, len, status = 0;
- rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
-
- len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array));
- if (len < 0)
- return len;
-
- for (i = 0; i < len; i++) {
- /* AUTH_UNIX is the default flavor if none was specified,
- * thus has already been tried. */
- if (flav_array[i] == RPC_AUTH_UNIX)
- continue;
-
- status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
- if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
- continue;
- break;
+ /* Per 3530bis 15.33.5 */
+ static const rpc_authflavor_t flav_array[] = {
+ RPC_AUTH_GSS_KRB5P,
+ RPC_AUTH_GSS_KRB5I,
+ RPC_AUTH_GSS_KRB5,
+ RPC_AUTH_UNIX, /* courtesy */
+ RPC_AUTH_NULL,
+ };
+ int status = -EPERM;
+ size_t i;
+
+ if (server->auth_info.flavor_len > 0) {
+ /* try each flavor specified by user */
+ for (i = 0; i < server->auth_info.flavor_len; i++) {
+ status = nfs4_lookup_root_sec(server, fhandle, info,
+ server->auth_info.flavors[i]);
+ if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+ continue;
+ break;
+ }
+ } else {
+ /* no flavors specified by user, try default list */
+ for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
+ status = nfs4_lookup_root_sec(server, fhandle, info,
+ flav_array[i]);
+ if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
+ continue;
+ break;
+ }
}
+
/*
* -EACCESS could mean that the user doesn't have correct permissions
* to access the mount. It could also mean that we tried to mount
@@ -2449,24 +2963,42 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-/*
- * get the file handle for the "/" directory on the server
+static int nfs4_do_find_root_sec(struct nfs_server *server,
+ struct nfs_fh *fhandle, struct nfs_fsinfo *info)
+{
+ int mv = server->nfs_client->cl_minorversion;
+ return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info);
+}
+
+/**
+ * nfs4_proc_get_rootfh - get file handle for server's pseudoroot
+ * @server: initialized nfs_server handle
+ * @fhandle: we fill in the pseudo-fs root file handle
+ * @info: we fill in an FSINFO struct
+ * @auth_probe: probe the auth flavours
+ *
+ * Returns zero on success, or a negative errno.
*/
int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info)
+ struct nfs_fsinfo *info,
+ bool auth_probe)
{
- int minor_version = server->nfs_client->cl_minorversion;
- int status = nfs4_lookup_root(server, fhandle, info);
- if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
- /*
- * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
- * by nfs4_map_errors() as this function exits.
- */
- status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
+ int status;
+
+ switch (auth_probe) {
+ case false:
+ status = nfs4_lookup_root(server, fhandle, info);
+ if (status != -NFS4ERR_WRONGSEC)
+ break;
+ default:
+ status = nfs4_do_find_root_sec(server, fhandle, info);
+ }
+
if (status == 0)
status = nfs4_server_capabilities(server, fhandle);
if (status == 0)
status = nfs4_do_fsinfo(server, fhandle, info);
+
return nfs4_map_errors(status);
}
@@ -2475,6 +3007,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
{
int error;
struct nfs_fattr *fattr = info->fattr;
+ struct nfs4_label *label = NULL;
error = nfs4_server_capabilities(server, mntfh);
if (error < 0) {
@@ -2482,16 +3015,23 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
return error;
}
- error = nfs4_proc_getattr(server, mntfh, fattr);
+ label = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
+ error = nfs4_proc_getattr(server, mntfh, fattr, label);
if (error < 0) {
dprintk("nfs4_get_root: getattr error = %d\n", -error);
- return error;
+ goto err_free_label;
}
if (fattr->valid & NFS_ATTR_FATTR_FSID &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid))
memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+err_free_label:
+ nfs4_label_free(label);
+
return error;
}
@@ -2518,11 +3058,16 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
- /* Make sure server returned a different fsid for the referral */
+
+ /*
+ * If the fsid didn't change, this is a migration event, not a
+ * referral. Cause us to drop into the exception handler, which
+ * will kick off migration recovery.
+ */
if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
dprintk("%s: server did not return a different fsid for"
" a referral at %s\n", __func__, name->name);
- status = -EIO;
+ status = -NFS4ERR_MOVED;
goto out;
}
/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
@@ -2538,7 +3083,8 @@ out:
return status;
}
-static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_getattr_arg args = {
.fh = fhandle,
@@ -2546,6 +3092,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
};
struct nfs4_getattr_res res = {
.fattr = fattr,
+ .label = label,
.server = server,
};
struct rpc_message msg = {
@@ -2553,18 +3100,22 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_argp = &args,
.rpc_resp = &res,
};
-
+
+ args.bitmask = nfs4_bitmask(server, label);
+
nfs_fattr_init(fattr);
return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
}
-static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_proc_getattr(server, fhandle, fattr),
+ err = _nfs4_proc_getattr(server, fhandle, fattr, label);
+ trace_nfs4_getattr(server, fhandle, fattr, err);
+ err = nfs4_handle_exception(server, err,
&exception);
} while (exception.retry);
return err;
@@ -2594,19 +3145,20 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct inode *inode = dentry->d_inode;
struct rpc_cred *cred = NULL;
struct nfs4_state *state = NULL;
+ struct nfs4_label *label = NULL;
int status;
if (pnfs_ld_layoutret_on_setattr(inode))
- pnfs_return_layout(inode);
+ pnfs_commit_and_return_layout(inode);
nfs_fattr_init(fattr);
/* Deal with open(O_TRUNC) */
if (sattr->ia_valid & ATTR_OPEN)
- sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+ sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME);
/* Optimization: if the end result is no change, don't RPC */
- if ((sattr->ia_valid & ~(ATTR_FILE)) == 0)
+ if ((sattr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
/* Search for an existing open(O_WRITE) file */
@@ -2620,15 +3172,22 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
}
- status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
- if (status == 0)
+ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
+ status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
+ if (status == 0) {
nfs_setattr_update_inode(inode, sattr);
+ nfs_setsecurity(inode, fattr, label);
+ }
+ nfs4_label_free(label);
return status;
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
const struct qstr *name, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs_server *server = NFS_SERVER(dir);
int status;
@@ -2640,6 +3199,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
struct nfs4_lookup_res res = {
.server = server,
.fattr = fattr,
+ .label = label,
.fh = fhandle,
};
struct rpc_message msg = {
@@ -2648,6 +3208,8 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
.rpc_resp = &res,
};
+ args.bitmask = nfs4_bitmask(server, label);
+
nfs_fattr_init(fattr);
dprintk("NFS call lookup %s\n", name->name);
@@ -2666,13 +3228,14 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
struct qstr *name, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = { };
struct rpc_clnt *client = *clnt;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr);
+ err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label);
+ trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
err = -ENOENT;
@@ -2684,8 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
err = -EPERM;
if (client != *clnt)
goto out;
-
- client = nfs4_create_sec_client(client, dir, name);
+ client = nfs4_negotiate_security(client, dir, name);
if (IS_ERR(client))
return PTR_ERR(client);
@@ -2706,12 +3268,13 @@ out:
}
static int nfs4_proc_lookup(struct inode *dir, struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -2723,15 +3286,13 @@ struct rpc_clnt *
nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
+ struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
- if (status < 0) {
- rpc_shutdown_client(client);
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
+ if (status < 0)
return ERR_PTR(status);
- }
- return client;
+ return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
}
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
@@ -2751,7 +3312,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_cred = entry->cred,
};
int mode = entry->mask;
- int status;
+ int status = 0;
/*
* Determine which access bits we want to ask for...
@@ -2788,8 +3349,9 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(inode),
- _nfs4_proc_access(inode, entry),
+ err = _nfs4_proc_access(inode, entry);
+ trace_nfs4_access(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
return err;
@@ -2842,8 +3404,9 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(inode),
- _nfs4_proc_readlink(inode, page, pgbase, pglen),
+ err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
+ trace_nfs4_readlink(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
return err;
@@ -2856,27 +3419,26 @@ static int
nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags)
{
+ struct nfs4_label l, *ilabel = NULL;
struct nfs_open_context *ctx;
struct nfs4_state *state;
+ int opened = 0;
int status = 0;
ctx = alloc_nfs_open_context(dentry, FMODE_READ);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ ilabel = nfs4_label_init_security(dir, dentry, sattr, &l);
+
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, dentry, ctx->mode,
- flags, sattr, ctx->cred,
- &ctx->mdsthreshold);
- d_drop(dentry);
+ state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened);
if (IS_ERR(state)) {
status = PTR_ERR(state);
goto out;
}
- d_add(dentry, igrab(state->inode));
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- ctx->state = state;
out:
+ nfs4_label_release_security(ilabel);
put_nfs_open_context(ctx);
return status;
}
@@ -2909,8 +3471,9 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_remove(dir, name),
+ err = _nfs4_proc_remove(dir, name);
+ trace_nfs4_remove(dir, name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
return err;
@@ -2924,7 +3487,9 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
res->server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
- nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
+ nfs4_init_sequence(&args->seq_args, &res->seq_res, 1);
+
+ nfs_fattr_init(res->dir_attr);
}
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
@@ -2937,7 +3502,8 @@ static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlin
static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
{
- struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ struct nfs_unlinkdata *data = task->tk_calldata;
+ struct nfs_removeres *res = &data->res;
if (!nfs4_sequence_done(task, &res->seq_res))
return 0;
@@ -2955,7 +3521,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
res->server = server;
- nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
+ nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1);
}
static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
@@ -2969,7 +3535,8 @@ static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renam
static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
struct inode *new_dir)
{
- struct nfs_renameres *res = task->tk_msg.rpc_resp;
+ struct nfs_renamedata *data = task->tk_calldata;
+ struct nfs_renameres *res = &data->res;
if (!nfs4_sequence_done(task, &res->seq_res))
return 0;
@@ -2981,48 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 1;
}
-static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_server *server = NFS_SERVER(old_dir);
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .new_dir = NFS_FH(new_dir),
- .old_name = old_name,
- .new_name = new_name,
- };
- struct nfs_renameres res = {
- .server = server,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
- .rpc_argp = &arg,
- .rpc_resp = &res,
- };
- int status = -ENOMEM;
-
- status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
- if (!status) {
- update_changeattr(old_dir, &res.old_cinfo);
- update_changeattr(new_dir, &res.new_cinfo);
- }
- return status;
-}
-
-static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs4_exception exception = { };
- int err;
- do {
- err = nfs4_handle_exception(NFS_SERVER(old_dir),
- _nfs4_proc_rename(old_dir, old_name,
- new_dir, new_name),
- &exception);
- } while (exception.retry);
- return err;
-}
-
static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_server *server = NFS_SERVER(inode);
@@ -3034,6 +3559,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
};
struct nfs4_link_res res = {
.server = server,
+ .label = NULL,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
@@ -3046,11 +3572,24 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
if (res.fattr == NULL)
goto out;
+ res.label = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(res.label)) {
+ status = PTR_ERR(res.label);
+ goto out;
+ }
+ arg.bitmask = nfs4_bitmask(server, res.label);
+
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
update_changeattr(dir, &res.cinfo);
- nfs_post_op_update_inode(inode, res.fattr);
+ status = nfs_post_op_update_inode(inode, res.fattr);
+ if (!status)
+ nfs_setsecurity(inode, res.fattr, res.label);
}
+
+
+ nfs4_label_free(res.label);
+
out:
nfs_free_fattr(res.fattr);
return status;
@@ -3074,6 +3613,7 @@ struct nfs4_createdata {
struct nfs4_create_res res;
struct nfs_fh fh;
struct nfs_fattr fattr;
+ struct nfs4_label *label;
};
static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -3085,6 +3625,10 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
if (data != NULL) {
struct nfs_server *server = NFS_SERVER(dir);
+ data->label = nfs4_label_alloc(server, GFP_KERNEL);
+ if (IS_ERR(data->label))
+ goto out_free;
+
data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE];
data->msg.rpc_argp = &data->arg;
data->msg.rpc_resp = &data->res;
@@ -3093,13 +3637,17 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
data->arg.name = name;
data->arg.attrs = sattr;
data->arg.ftype = ftype;
- data->arg.bitmask = server->attr_bitmask;
+ data->arg.bitmask = nfs4_bitmask(server, data->label);
data->res.server = server;
data->res.fh = &data->fh;
data->res.fattr = &data->fattr;
+ data->res.label = data->label;
nfs_fattr_init(data->res.fattr);
}
return data;
+out_free:
+ kfree(data);
+ return NULL;
}
static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
@@ -3108,18 +3656,20 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
update_changeattr(dir, &data->res.dir_cinfo);
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
return status;
}
static void nfs4_free_createdata(struct nfs4_createdata *data)
{
+ nfs4_label_free(data->label);
kfree(data);
}
static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
- struct page *page, unsigned int len, struct iattr *sattr)
+ struct page *page, unsigned int len, struct iattr *sattr,
+ struct nfs4_label *label)
{
struct nfs4_createdata *data;
int status = -ENAMETOOLONG;
@@ -3135,6 +3685,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK];
data->arg.u.symlink.pages = &page;
data->arg.u.symlink.len = len;
+ data->arg.label = label;
status = nfs4_do_create(dir, dentry, data);
@@ -3147,18 +3698,24 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
struct page *page, unsigned int len, struct iattr *sattr)
{
struct nfs4_exception exception = { };
+ struct nfs4_label l, *label = NULL;
int err;
+
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_symlink(dir, dentry, page,
- len, sattr),
+ err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label);
+ trace_nfs4_symlink(dir, &dentry->d_name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
+
+ nfs4_label_release_security(label);
return err;
}
static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr)
+ struct iattr *sattr, struct nfs4_label *label)
{
struct nfs4_createdata *data;
int status = -ENOMEM;
@@ -3167,6 +3724,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
if (data == NULL)
goto out;
+ data->arg.label = label;
status = nfs4_do_create(dir, dentry, data);
nfs4_free_createdata(data);
@@ -3178,14 +3736,20 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
struct iattr *sattr)
{
struct nfs4_exception exception = { };
+ struct nfs4_label l, *label = NULL;
int err;
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+
sattr->ia_mode &= ~current_umask();
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_mkdir(dir, dentry, sattr),
+ err = _nfs4_proc_mkdir(dir, dentry, sattr, label);
+ trace_nfs4_mkdir(dir, &dentry->d_name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
+ nfs4_label_release_security(label);
+
return err;
}
@@ -3210,9 +3774,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
};
int status;
- dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__,
- dentry->d_parent->d_name.name,
- dentry->d_name.name,
+ dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
+ dentry,
(unsigned long long)cookie);
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
@@ -3234,16 +3797,17 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
- _nfs4_proc_readdir(dentry, cred, cookie,
- pages, count, plus),
+ err = _nfs4_proc_readdir(dentry, cred, cookie,
+ pages, count, plus);
+ trace_nfs4_readdir(dentry->d_inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err,
&exception);
} while (exception.retry);
return err;
}
static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
- struct iattr *sattr, dev_t rdev)
+ struct iattr *sattr, struct nfs4_label *label, dev_t rdev)
{
struct nfs4_createdata *data;
int mode = sattr->ia_mode;
@@ -3268,7 +3832,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
status = -EINVAL;
goto out_free;
}
-
+
+ data->arg.label = label;
status = nfs4_do_create(dir, dentry, data);
out_free:
nfs4_free_createdata(data);
@@ -3280,14 +3845,21 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
struct iattr *sattr, dev_t rdev)
{
struct nfs4_exception exception = { };
+ struct nfs4_label l, *label = NULL;
int err;
+ label = nfs4_label_init_security(dir, dentry, sattr, &l);
+
sattr->ia_mode &= ~current_umask();
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_mknod(dir, dentry, sattr, rdev),
+ err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev);
+ trace_nfs4_mknod(dir, &dentry->d_name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
+
+ nfs4_label_release_security(label);
+
return err;
}
@@ -3345,12 +3917,22 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
{
struct nfs4_exception exception = { };
+ unsigned long now = jiffies;
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_do_fsinfo(server, fhandle, fsinfo),
- &exception);
+ err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
+ trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
+ if (err == 0) {
+ struct nfs_client *clp = server->nfs_client;
+
+ spin_lock(&clp->cl_lock);
+ clp->cl_lease_time = fsinfo->lease_time * HZ;
+ clp->cl_last_renewal = now;
+ spin_unlock(&clp->cl_lock);
+ break;
+ }
+ err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
@@ -3410,15 +3992,57 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+int nfs4_set_rw_stateid(nfs4_stateid *stateid,
+ const struct nfs_open_context *ctx,
+ const struct nfs_lock_context *l_ctx,
+ fmode_t fmode)
+{
+ const struct nfs_lockowner *lockowner = NULL;
+
+ if (l_ctx != NULL)
+ lockowner = &l_ctx->lockowner;
+ return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner);
+}
+EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid);
+
+static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
+ const struct nfs_open_context *ctx,
+ const struct nfs_lock_context *l_ctx,
+ fmode_t fmode)
+{
+ nfs4_stateid current_stateid;
+
+ /* If the current stateid represents a lost lock, then exit */
+ if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ return true;
+ return nfs4_stateid_match(stateid, &current_stateid);
+}
+
+static bool nfs4_error_stateid_expired(int err)
+{
+ switch (err) {
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_OPENMODE:
+ case -NFS4ERR_EXPIRED:
+ return true;
+ }
+ return false;
+}
+
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
{
nfs_invalidate_atime(data->header->inode);
}
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
+ trace_nfs4_read(data, task->tk_status);
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -3430,38 +4054,61 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static bool nfs4_read_stateid_changed(struct rpc_task *task,
+ struct nfs_pgio_args *args)
+{
+
+ if (!nfs4_error_stateid_expired(task->tk_status) ||
+ nfs4_stateid_is_current(&args->stateid,
+ args->context,
+ args->lock_context,
+ FMODE_READ))
+ return false;
+ rpc_restart_call_prepare(task);
+ return true;
+}
+
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
-
- return data->read_done_cb ? data->read_done_cb(task, data) :
+ if (nfs4_read_stateid_changed(task, &data->args))
+ return -EAGAIN;
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_read_done_cb(task, data);
}
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
data->timestamp = jiffies;
- data->read_done_cb = nfs4_read_done_cb;
+ data->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
- nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
}
-static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
- nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
&data->res.seq_res,
- task);
+ task))
+ return 0;
+ if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
+ data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
+ return -EIO;
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+ return -EIO;
+ return 0;
}
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
+ trace_nfs4_write(data, task->tk_status);
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -3473,16 +4120,32 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
return 0;
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static bool nfs4_write_stateid_changed(struct rpc_task *task,
+ struct nfs_pgio_args *args)
+{
+
+ if (!nfs4_error_stateid_expired(task->tk_status) ||
+ nfs4_stateid_is_current(&args->stateid,
+ args->context,
+ args->lock_context,
+ FMODE_WRITE))
+ return false;
+ rpc_restart_call_prepare(task);
+ return true;
+}
+
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
- return data->write_done_cb ? data->write_done_cb(task, data) :
+ if (nfs4_write_stateid_changed(task, &data->args))
+ return -EAGAIN;
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_write_done_cb(task, data);
}
static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
{
const struct nfs_pgio_header *hdr = data->header;
@@ -3495,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
@@ -3505,21 +4168,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
} else
data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_write_done_cb;
+ if (!data->pgio_done_cb)
+ data->pgio_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
- nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
-}
-
-static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- nfs4_setup_sequence(NFS_SERVER(data->header->inode),
- &data->args.seq_args,
- &data->res.seq_res,
- task);
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -3534,6 +4189,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da
{
struct inode *inode = data->inode;
+ trace_nfs4_commit(data, task->tk_status);
if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -3556,7 +4212,7 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->commit_done_cb = nfs4_commit_done_cb;
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
- nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
struct nfs4_renewdata {
@@ -3585,7 +4241,14 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
struct nfs_client *clp = data->client;
unsigned long timestamp = data->timestamp;
- if (task->tk_status < 0) {
+ trace_nfs4_renew_async(clp, task->tk_status);
+ switch (task->tk_status) {
+ case 0:
+ break;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ break;
+ default:
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
return;
@@ -3621,7 +4284,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
return -ENOMEM;
data->client = clp;
data->timestamp = jiffies;
- return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+ return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
&nfs4_renew_ops, data);
}
@@ -3635,7 +4298,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
unsigned long now = jiffies;
int status;
- status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (status < 0)
return status;
do_renew_lease(clp, now);
@@ -3644,9 +4307,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
static inline int nfs4_server_supports_acls(struct nfs_server *server)
{
- return (server->caps & NFS_CAP_ACLS)
- && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
- && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
+ return server->caps & NFS_CAP_ACLS;
}
/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
@@ -3842,6 +4503,7 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl
ssize_t ret;
do {
ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+ trace_nfs4_get_acl(inode, ret);
if (ret >= 0)
break;
ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
@@ -3921,14 +4583,166 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(inode),
- __nfs4_proc_set_acl(inode, buf, buflen),
+ err = __nfs4_proc_set_acl(inode, buf, buflen);
+ trace_nfs4_set_acl(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static int _nfs4_get_security_label(struct inode *inode, void *buf,
+ size_t buflen)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_fattr fattr;
+ struct nfs4_label label = {0, 0, buflen, buf};
+
+ u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
+ struct nfs4_getattr_arg arg = {
+ .fh = NFS_FH(inode),
+ .bitmask = bitmask,
+ };
+ struct nfs4_getattr_res res = {
+ .fattr = &fattr,
+ .label = &label,
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int ret;
+
+ nfs_fattr_init(&fattr);
+
+ ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0);
+ if (ret)
+ return ret;
+ if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
+ return -ENOENT;
+ if (buflen < label.len)
+ return -ERANGE;
+ return 0;
+}
+
+static int nfs4_get_security_label(struct inode *inode, void *buf,
+ size_t buflen)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
+ return -EOPNOTSUPP;
+
+ do {
+ err = _nfs4_get_security_label(inode, buf, buflen);
+ trace_nfs4_get_security_label(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
+static int _nfs4_do_set_security_label(struct inode *inode,
+ struct nfs4_label *ilabel,
+ struct nfs_fattr *fattr,
+ struct nfs4_label *olabel)
+{
+
+ struct iattr sattr = {0};
+ struct nfs_server *server = NFS_SERVER(inode);
+ const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
+ struct nfs_setattrargs arg = {
+ .fh = NFS_FH(inode),
+ .iap = &sattr,
+ .server = server,
+ .bitmask = bitmask,
+ .label = ilabel,
+ };
+ struct nfs_setattrres res = {
+ .fattr = fattr,
+ .label = olabel,
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+ int status;
+
+ nfs4_stateid_copy(&arg.stateid, &zero_stateid);
+
+ status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
+ if (status)
+ dprintk("%s failed: %d\n", __func__, status);
+
+ return status;
+}
+
+static int nfs4_do_set_security_label(struct inode *inode,
+ struct nfs4_label *ilabel,
+ struct nfs_fattr *fattr,
+ struct nfs4_label *olabel)
+{
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs4_do_set_security_label(inode, ilabel,
+ fattr, olabel);
+ trace_nfs4_set_security_label(inode, err);
+ err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
return err;
}
static int
+nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen)
+{
+ struct nfs4_label ilabel, *olabel = NULL;
+ struct nfs_fattr fattr;
+ struct rpc_cred *cred;
+ struct inode *inode = dentry->d_inode;
+ int status;
+
+ if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
+ return -EOPNOTSUPP;
+
+ nfs_fattr_init(&fattr);
+
+ ilabel.pi = 0;
+ ilabel.lfs = 0;
+ ilabel.label = (char *)buf;
+ ilabel.len = buflen;
+
+ cred = rpc_lookup_cred();
+ if (IS_ERR(cred))
+ return PTR_ERR(cred);
+
+ olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+ if (IS_ERR(olabel)) {
+ status = -PTR_ERR(olabel);
+ goto out;
+ }
+
+ status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel);
+ if (status == 0)
+ nfs_setsecurity(inode, &fattr, olabel);
+
+ nfs4_label_free(olabel);
+out:
+ put_rpccred(cred);
+ return status;
+}
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+
+
+static int
nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
{
struct nfs_client *clp = server->nfs_client;
@@ -3945,15 +4759,25 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_schedule_stateid_recovery(server, state);
+ if (nfs4_schedule_stateid_recovery(server, state) < 0)
+ goto recovery_failed;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
- if (state != NULL)
- nfs4_schedule_stateid_recovery(server, state);
+ if (state != NULL) {
+ if (nfs4_schedule_stateid_recovery(server, state) < 0)
+ goto recovery_failed;
+ }
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
+ case -NFS4ERR_MOVED:
+ if (nfs4_schedule_migration_recovery(server) < 0)
+ goto recovery_failed;
+ goto wait_on_recovery;
+ case -NFS4ERR_LEASE_MOVED:
+ nfs4_schedule_lease_moved_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3965,26 +4789,28 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- task->tk_status = 0;
- return -EAGAIN;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
- task->tk_status = 0;
- return -EAGAIN;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
- task->tk_status = 0;
- return -EAGAIN;
+ goto restart_call;
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
+recovery_failed:
+ task->tk_status = -EIO;
+ return 0;
wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ goto recovery_failed;
+restart_call:
task->tk_status = 0;
return -EAGAIN;
}
@@ -3998,11 +4824,11 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
/* An impossible timestamp guarantees this value
* will never match a generated boot time. */
verf[0] = 0;
- verf[1] = (__be32)(NSEC_PER_SEC + 1);
+ verf[1] = cpu_to_be32(NSEC_PER_SEC + 1);
} else {
struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
- verf[0] = (__be32)nn->boot_time.tv_sec;
- verf[1] = (__be32)nn->boot_time.tv_nsec;
+ verf[0] = cpu_to_be32(nn->boot_time.tv_sec);
+ verf[1] = cpu_to_be32(nn->boot_time.tv_nsec);
}
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
@@ -4028,15 +4854,33 @@ static unsigned int
nfs4_init_uniform_client_string(const struct nfs_client *clp,
char *buf, size_t len)
{
- char *nodename = clp->cl_rpcclient->cl_nodename;
+ const char *nodename = clp->cl_rpcclient->cl_nodename;
if (nfs4_client_id_uniquifier[0] != '\0')
- nodename = nfs4_client_id_uniquifier;
+ return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
+ clp->rpc_ops->version,
+ clp->cl_minorversion,
+ nfs4_client_id_uniquifier,
+ nodename);
return scnprintf(buf, len, "Linux NFSv%u.%u %s",
clp->rpc_ops->version, clp->cl_minorversion,
nodename);
}
+/*
+ * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback
+ * services. Advertise one based on the address family of the
+ * clientaddr.
+ */
+static unsigned int
+nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
+{
+ if (strchr(clp->cl_ipaddr, ':') != NULL)
+ return scnprintf(buf, len, "tcp6");
+ else
+ return scnprintf(buf, len, "tcp");
+}
+
/**
* nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure
@@ -4078,12 +4922,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
setclientid.sc_name,
sizeof(setclientid.sc_name));
/* cb_client4 */
- rcu_read_lock();
- setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
- sizeof(setclientid.sc_netid),
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_NETID));
- rcu_read_unlock();
+ setclientid.sc_netid_len =
+ nfs4_init_callback_netid(clp,
+ setclientid.sc_netid,
+ sizeof(setclientid.sc_netid));
setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
@@ -4092,6 +4934,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
clp->cl_rpcclient->cl_auth->au_ops->au_name,
setclientid.sc_name_len, setclientid.sc_name);
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_setclientid(clp, status);
dprintk("NFS reply setclientid: %d\n", status);
return status;
}
@@ -4108,27 +4951,18 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
struct nfs4_setclientid_res *arg,
struct rpc_cred *cred)
{
- struct nfs_fsinfo fsinfo;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
.rpc_argp = arg,
- .rpc_resp = &fsinfo,
.rpc_cred = cred,
};
- unsigned long now;
int status;
dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
clp->cl_clientid);
- now = jiffies;
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
- if (status == 0) {
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo.lease_time * HZ;
- clp->cl_last_renewal = now;
- spin_unlock(&clp->cl_lock);
- }
+ trace_nfs4_setclientid_confirm(clp, status);
dprintk("NFS reply setclientid_confirm: %d\n", status);
return status;
}
@@ -4150,12 +4984,19 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
+ trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -4171,7 +5012,6 @@ static void nfs4_delegreturn_release(void *calldata)
kfree(calldata);
}
-#if defined(CONFIG_NFS_V4_1)
static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
{
struct nfs4_delegreturndata *d_data;
@@ -4183,12 +5023,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
&d_data->res.seq_res,
task);
}
-#endif /* CONFIG_NFS_V4_1 */
static const struct rpc_call_ops nfs4_delegreturn_ops = {
-#if defined(CONFIG_NFS_V4_1)
.rpc_call_prepare = nfs4_delegreturn_prepare,
-#endif /* CONFIG_NFS_V4_1 */
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
};
@@ -4213,7 +5050,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
data = kzalloc(sizeof(*data), GFP_NOFS);
if (data == NULL)
return -ENOMEM;
- nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
data->args.bitmask = server->cache_consistency_bitmask;
@@ -4253,6 +5090,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
int err;
do {
err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
+ trace_nfs4_delegreturn(inode, err);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
@@ -4273,7 +5111,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- freezable_schedule_timeout_killable(timeout);
+ freezable_schedule_timeout_killable_unsafe(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
@@ -4317,6 +5155,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
+ request->fl_ops = NULL;
out:
return status;
}
@@ -4327,8 +5166,9 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(state->inode),
- _nfs4_proc_getlk(state, cmd, request),
+ err = _nfs4_proc_getlk(state, cmd, request);
+ trace_nfs4_get_lock(request, state, cmd, err);
+ err = nfs4_handle_exception(NFS_SERVER(state->inode), err,
&exception);
} while (exception.retry);
return err;
@@ -4423,12 +5263,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
struct nfs4_unlockdata *calldata = data;
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
- return;
+ goto out_wait;
if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
/* Note: exit _without_ running nfs4_locku_done */
- task->tk_action = NULL;
- nfs4_sequence_done(task, &calldata->res.seq_res);
- return;
+ goto out_no_action;
}
calldata->timestamp = jiffies;
if (nfs4_setup_sequence(calldata->server,
@@ -4436,6 +5274,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
&calldata->res.seq_res,
task) != 0)
nfs_release_seqid(calldata->arg.seqid);
+ return;
+out_no_action:
+ task->tk_action = NULL;
+out_wait:
+ nfs4_sequence_done(task, &calldata->res.seq_res);
}
static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4462,6 +5305,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
.flags = RPC_TASK_ASYNC,
};
+ nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
+ NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg);
+
/* Ensure this is an unlock - when canceling a lock, the
* canceled lock is passed in, and it won't be an unlock.
*/
@@ -4473,7 +5319,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
return ERR_PTR(-ENOMEM);
}
- nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
@@ -4482,7 +5328,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs_inode *nfsi = NFS_I(state->inode);
+ struct inode *inode = state->inode;
+ struct nfs4_state_owner *sp = state->owner;
+ struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_seqid *seqid;
struct nfs4_lock_state *lsp;
struct rpc_task *task;
@@ -4492,18 +5340,23 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
status = nfs4_set_lock_state(state, request);
/* Unlock _before_ we do the RPC call */
request->fl_flags |= FL_EXISTS;
+ /* Exclude nfs_delegation_claim_locks() */
+ mutex_lock(&sp->so_delegreturn_mutex);
+ /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
down_read(&nfsi->rwsem);
if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
}
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
if (status != 0)
goto out;
/* Is this a delegated lock? */
- if (test_bit(NFS_DELEGATED_STATE, &state->flags))
- goto out;
lsp = request->fl_u.nfs4_fl.owner;
+ if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0)
+ goto out;
seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
status = -ENOMEM;
if (seqid == NULL)
@@ -4516,6 +5369,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
rpc_put_task(task);
out:
request->fl_flags = fl_flags;
+ trace_nfs4_unlock(request, state, F_SETLK, status);
return status;
}
@@ -4576,26 +5430,34 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
dprintk("%s: begin!\n", __func__);
if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
- return;
+ goto out_wait;
/* Do we need to do an open_to_lock_owner? */
if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
goto out_release_lock_seqid;
}
- data->arg.open_stateid = &state->stateid;
+ data->arg.open_stateid = &state->open_stateid;
data->arg.new_lock_owner = 1;
data->res.open_seqid = data->arg.open_seqid;
} else
data->arg.new_lock_owner = 0;
+ if (!nfs4_valid_open_stateid(state)) {
+ data->rpc_status = -EBADF;
+ task->tk_action = NULL;
+ goto out_release_open_seqid;
+ }
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->server,
&data->arg.seq_args,
&data->res.seq_res,
task) == 0)
return;
+out_release_open_seqid:
nfs_release_seqid(data->arg.open_seqid);
out_release_lock_seqid:
nfs_release_seqid(data->arg.lock_seqid);
+out_wait:
+ nfs4_sequence_done(task, &data->res.seq_res);
dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
}
@@ -4693,7 +5555,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
return -ENOMEM;
if (IS_SETLKW(cmd))
data->arg.block = 1;
- nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
@@ -4731,6 +5593,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
+ trace_nfs4_lock_reclaim(request, state, F_SETLK, err);
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -4749,10 +5612,15 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
err = nfs4_set_lock_state(state, request);
if (err != 0)
return err;
+ if (!recover_lost_locks) {
+ set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags);
+ return 0;
+ }
do {
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
+ trace_nfs4_lock_expired(request, state, F_SETLK, err);
switch (err) {
default:
goto out;
@@ -4783,13 +5651,19 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
list_for_each_entry(lsp, &state->lock_states, ls_locks) {
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
- status = nfs41_test_stateid(server, &lsp->ls_stateid);
+ struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
+
+ status = nfs41_test_stateid(server,
+ &lsp->ls_stateid,
+ cred);
+ trace_nfs4_test_lock_stateid(state, lsp, status);
if (status != NFS_OK) {
/* Free the stateid unless the server
* informs us the stateid is unrecognized. */
if (status != -NFS4ERR_BAD_STATEID)
nfs41_free_stateid(server,
- &lsp->ls_stateid);
+ &lsp->ls_stateid,
+ cred);
clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
ret = status;
}
@@ -4813,8 +5687,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
+ struct nfs4_state_owner *sp = state->owner;
struct nfs_inode *nfsi = NFS_I(state->inode);
unsigned char fl_flags = request->fl_flags;
+ unsigned int seq;
int status = -ENOLCK;
if ((fl_flags & FL_POSIX) &&
@@ -4836,9 +5712,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
status = do_vfs_lock(request->fl_file, request);
goto out_unlock;
}
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+ up_read(&nfsi->rwsem);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
if (status != 0)
+ goto out;
+ down_read(&nfsi->rwsem);
+ if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
+ status = -NFS4ERR_DELAY;
goto out_unlock;
+ }
/* Note: we always want to sleep here! */
request->fl_flags = fl_flags | FL_SLEEP;
if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4861,6 +5744,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
do {
err = _nfs4_proc_setlk(state, cmd, request);
+ trace_nfs4_set_lock(request, state, cmd, err);
if (err == -NFS4ERR_DENIED)
err = -EAGAIN;
err = nfs4_handle_exception(NFS_SERVER(state->inode),
@@ -4927,69 +5811,54 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
return status;
}
-int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
+int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
int err;
err = nfs4_set_lock_state(state, fl);
if (err != 0)
- goto out;
- do {
- err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
- switch (err) {
- default:
- printk(KERN_ERR "NFS: %s: unhandled error "
- "%d.\n", __func__, err);
- case 0:
- case -ESTALE:
- goto out;
- case -NFS4ERR_EXPIRED:
- nfs4_schedule_stateid_recovery(server, state);
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_STALE_STATEID:
- nfs4_schedule_lease_recovery(server->nfs_client);
- goto out;
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- case -NFS4ERR_DEADSESSION:
- nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
- goto out;
- case -ERESTARTSYS:
- /*
- * The show must go on: exit, but mark the
- * stateid as needing recovery.
- */
- case -NFS4ERR_DELEG_REVOKED:
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OPENMODE:
- nfs4_schedule_stateid_recovery(server, state);
- err = 0;
- goto out;
- case -ENOMEM:
- case -NFS4ERR_DENIED:
- /* kill_proc(fl->fl_pid, SIGLOST, 1); */
- err = 0;
- goto out;
- case -NFS4ERR_DELAY:
- break;
- }
- err = nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
-out:
- return err;
+ return err;
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ return nfs4_handle_delegation_recall_error(server, state, stateid, err);
}
struct nfs_release_lockowner_data {
struct nfs4_lock_state *lsp;
struct nfs_server *server;
struct nfs_release_lockowner_args args;
+ struct nfs_release_lockowner_res res;
+ unsigned long timestamp;
};
+static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_release_lockowner_data *data = calldata;
+ nfs40_setup_sequence(data->server,
+ &data->args.seq_args, &data->res.seq_res, task);
+ data->timestamp = jiffies;
+}
+
+static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs_release_lockowner_data *data = calldata;
+ struct nfs_server *server = data->server;
+
+ nfs40_sequence_done(task, &data->res.seq_res);
+
+ switch (task->tk_status) {
+ case 0:
+ renew_lease(server, data->timestamp);
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_LEASE_MOVED:
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+ }
+}
+
static void nfs4_release_lockowner_release(void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
@@ -4998,12 +5867,13 @@ static void nfs4_release_lockowner_release(void *calldata)
}
static const struct rpc_call_ops nfs4_release_lockowner_ops = {
+ .rpc_call_prepare = nfs4_release_lockowner_prepare,
+ .rpc_call_done = nfs4_release_lockowner_done,
.rpc_release = nfs4_release_lockowner_release,
};
-int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
+static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- struct nfs_server *server = lsp->ls_state->owner->so_server;
struct nfs_release_lockowner_data *data;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
@@ -5011,6 +5881,7 @@ int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
if (server->nfs_client->cl_mvops->minor_version != 0)
return -EINVAL;
+
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
return -ENOMEM;
@@ -5019,7 +5890,10 @@ int nfs4_release_lockowner(struct nfs4_lock_state *lsp)
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
data->args.lock_owner.id = lsp->ls_seqid.owner_id;
data->args.lock_owner.s_dev = server->s_dev;
+
msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
return 0;
}
@@ -5059,6 +5933,53 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
return len;
}
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+static inline int nfs4_server_supports_labels(struct nfs_server *server)
+{
+ return server->caps & NFS_CAP_SECURITY_LABEL;
+}
+
+static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key,
+ const void *buf, size_t buflen,
+ int flags, int type)
+{
+ if (security_ismaclabel(key))
+ return nfs4_set_security_label(dentry, buf, buflen);
+
+ return -EOPNOTSUPP;
+}
+
+static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key,
+ void *buf, size_t buflen, int type)
+{
+ if (security_ismaclabel(key))
+ return nfs4_get_security_label(dentry->d_inode, buf, buflen);
+ return -EOPNOTSUPP;
+}
+
+static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list,
+ size_t list_len, const char *name,
+ size_t name_len, int type)
+{
+ size_t len = 0;
+
+ if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(dentry->d_inode, NULL, 0);
+ if (list && len <= list_len)
+ security_inode_listsecurity(dentry->d_inode, list, len);
+ }
+ return len;
+}
+
+static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .list = nfs4_xattr_list_nfs4_label,
+ .get = nfs4_xattr_get_nfs4_label,
+ .set = nfs4_xattr_set_nfs4_label,
+};
+#endif
+
+
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
*/
@@ -5082,7 +6003,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
- u32 bitmask[2] = {
+ u32 bitmask[3] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
@@ -5126,14 +6047,300 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_fs_locations(client, dir, name, fs_locations, page),
+ err = _nfs4_proc_fs_locations(client, dir, name,
+ fs_locations, page);
+ trace_nfs4_get_fs_locations(dir, name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
return err;
}
-static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery. The server can stop returning
+ * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is
+ * appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+ .clientid = server->nfs_client->cl_clientid,
+ .fh = NFS_FH(inode),
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+ .renew = 1, /* append RENEW */
+ };
+ struct nfs4_fs_locations_res res = {
+ .fs_locations = locations,
+ .migration = 1,
+ .renew = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ unsigned long now = jiffies;
+ int status;
+
+ nfs_fattr_init(&locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ if (status)
+ return status;
+
+ renew_lease(server, now);
+ return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing migration recovery. The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client. The client ID
+ * performing this operation is identified in the SEQUENCE
+ * operation in this compound.
+ *
+ * When the client supports GETATTR(fs_locations_info), it can
+ * be plumbed in here.
+ */
+static int _nfs41_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ u32 bitmask[2] = {
+ [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
+ };
+ struct nfs4_fs_locations_arg args = {
+ .fh = NFS_FH(inode),
+ .page = page,
+ .bitmask = bitmask,
+ .migration = 1, /* skip LOOKUP */
+ };
+ struct nfs4_fs_locations_res res = {
+ .fs_locations = locations,
+ .migration = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ nfs_fattr_init(&locations->fattr);
+ locations->server = server;
+ locations->nlocations = 0;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ if (status == NFS4_OK &&
+ res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+ status = -NFS4ERR_LEASE_MOVED;
+ return status;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_get_locations - discover locations for a migrated FSID
+ * @inode: inode on FSID that is migrating
+ * @locations: result of query
+ * @page: buffer
+ * @cred: credential to use for this operation
+ *
+ * Returns NFS4_OK on success, a negative NFS4ERR status code if the
+ * operation failed, or a negative errno if a local error occurred.
+ *
+ * On success, "locations" is filled in, but if the server has
+ * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not
+ * asserted.
+ *
+ * -NFS4ERR_LEASE_MOVED is returned if the server still has leases
+ * from this client that require migration recovery.
+ */
+int nfs4_proc_get_locations(struct inode *inode,
+ struct nfs4_fs_locations *locations,
+ struct page *page, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_mig_recovery_ops *ops =
+ clp->cl_mvops->mig_recovery_ops;
+ struct nfs4_exception exception = { };
+ int status;
+
+ dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+ nfs_display_fhandle(NFS_FH(inode), __func__);
+
+ do {
+ status = ops->get_locations(inode, locations, page, cred);
+ if (status != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ return status;
+}
+
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery. The server can stop
+ * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation
+ * is appended to this compound to identify the client ID which is
+ * performing recovery.
+ */
+static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct rpc_clnt *clnt = server->client;
+ struct nfs4_fsid_present_arg args = {
+ .fh = NFS_FH(inode),
+ .clientid = clp->cl_clientid,
+ .renew = 1, /* append RENEW */
+ };
+ struct nfs4_fsid_present_res res = {
+ .renew = 1,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ unsigned long now = jiffies;
+ int status;
+
+ res.fh = nfs_alloc_fhandle();
+ if (res.fh == NULL)
+ return -ENOMEM;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ nfs_free_fhandle(res.fh);
+ if (status)
+ return status;
+
+ do_renew_lease(clp, now);
+ return 0;
+}
+
+#ifdef CONFIG_NFS_V4_1
+
+/*
+ * This operation also signals the server that this client is
+ * performing "lease moved" recovery. The server can stop asserting
+ * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing
+ * this operation is identified in the SEQUENCE operation in this
+ * compound.
+ */
+static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct rpc_clnt *clnt = server->client;
+ struct nfs4_fsid_present_arg args = {
+ .fh = NFS_FH(inode),
+ };
+ struct nfs4_fsid_present_res res = {
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ .rpc_cred = cred,
+ };
+ int status;
+
+ res.fh = nfs_alloc_fhandle();
+ if (res.fh == NULL)
+ return -ENOMEM;
+
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_set_sequence_privileged(&args.seq_args);
+ status = nfs4_call_sync_sequence(clnt, server, &msg,
+ &args.seq_args, &res.seq_res);
+ nfs_free_fhandle(res.fh);
+ if (status == NFS4_OK &&
+ res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
+ status = -NFS4ERR_LEASE_MOVED;
+ return status;
+}
+
+#endif /* CONFIG_NFS_V4_1 */
+
+/**
+ * nfs4_proc_fsid_present - Is this FSID present or absent on server?
+ * @inode: inode on FSID to check
+ * @cred: credential to use for this operation
+ *
+ * Server indicates whether the FSID is present, moved, or not
+ * recognized. This operation is necessary to clear a LEASE_MOVED
+ * condition for this client ID.
+ *
+ * Returns NFS4_OK if the FSID is present on this server,
+ * -NFS4ERR_MOVED if the FSID is no longer present, a negative
+ * NFS4ERR code if some error occurred on the server, or a
+ * negative errno if a local failure occurred.
+ */
+int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_mig_recovery_ops *ops =
+ clp->cl_mvops->mig_recovery_ops;
+ struct nfs4_exception exception = { };
+ int status;
+
+ dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+ nfs_display_fhandle(NFS_FH(inode), __func__);
+
+ do {
+ status = ops->fsid_present(inode, cred);
+ if (status != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, status, &exception);
+ } while (exception.retry);
+ return status;
+}
+
+/**
+ * If 'use_integrity' is true and the state managment nfs_client
+ * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
+ * and the machine credential as per RFC3530bis and RFC5661 Security
+ * Considerations sections. Otherwise, just use the user cred with the
+ * filesystem's rpc_client.
+ */
+static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity)
{
int status;
struct nfs4_secinfo_arg args = {
@@ -5148,10 +6355,27 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
.rpc_argp = &args,
.rpc_resp = &res,
};
+ struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+ struct rpc_cred *cred = NULL;
+
+ if (use_integrity) {
+ clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
+ cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+ msg.rpc_cred = cred;
+ }
dprintk("NFS call secinfo %s\n", name->name);
- status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+
+ nfs4_state_protect(NFS_SERVER(dir)->nfs_client,
+ NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+
+ status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args,
+ &res.seq_res, 0);
dprintk("NFS reply secinfo: %d\n", status);
+
+ if (cred)
+ put_rpccred(cred);
+
return status;
}
@@ -5161,8 +6385,23 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(NFS_SERVER(dir),
- _nfs4_proc_secinfo(dir, name, flavors),
+ err = -NFS4ERR_WRONGSEC;
+
+ /* try to use integrity protection with machine cred */
+ if (_nfs4_is_integrity_protected(NFS_SERVER(dir)->nfs_client))
+ err = _nfs4_proc_secinfo(dir, name, flavors, true);
+
+ /*
+ * if unable to use integrity protection, or SECINFO with
+ * integrity protection returns NFS4ERR_WRONGSEC (which is
+ * disallowed by spec, but exists in deployed servers) use
+ * the current filesystem's rpc_client and the user cred.
+ */
+ if (err == -NFS4ERR_WRONGSEC)
+ err = _nfs4_proc_secinfo(dir, name, flavors, false);
+
+ trace_nfs4_secinfo(dir, name, err);
+ err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
} while (exception.retry);
return err;
@@ -5226,6 +6465,7 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred
}
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_bind_conn_to_session(clp, status);
if (status == 0) {
if (memcmp(res.session->sess_id.data,
clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
@@ -5254,22 +6494,139 @@ out:
}
/*
- * nfs4_proc_exchange_id()
+ * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map
+ * and operations we'd like to see to enable certain features in the allow map
+ */
+static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
+ .how = SP4_MACH_CRED,
+ .enforce.u.words = {
+ [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+ 1 << (OP_EXCHANGE_ID - 32) |
+ 1 << (OP_CREATE_SESSION - 32) |
+ 1 << (OP_DESTROY_SESSION - 32) |
+ 1 << (OP_DESTROY_CLIENTID - 32)
+ },
+ .allow.u.words = {
+ [0] = 1 << (OP_CLOSE) |
+ 1 << (OP_LOCKU) |
+ 1 << (OP_COMMIT),
+ [1] = 1 << (OP_SECINFO - 32) |
+ 1 << (OP_SECINFO_NO_NAME - 32) |
+ 1 << (OP_TEST_STATEID - 32) |
+ 1 << (OP_FREE_STATEID - 32) |
+ 1 << (OP_WRITE - 32)
+ }
+};
+
+/*
+ * Select the state protection mode for client `clp' given the server results
+ * from exchange_id in `sp'.
*
- * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ * Returns 0 on success, negative errno otherwise.
+ */
+static int nfs4_sp4_select_mode(struct nfs_client *clp,
+ struct nfs41_state_protection *sp)
+{
+ static const u32 supported_enforce[NFS4_OP_MAP_NUM_WORDS] = {
+ [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+ 1 << (OP_EXCHANGE_ID - 32) |
+ 1 << (OP_CREATE_SESSION - 32) |
+ 1 << (OP_DESTROY_SESSION - 32) |
+ 1 << (OP_DESTROY_CLIENTID - 32)
+ };
+ unsigned int i;
+
+ if (sp->how == SP4_MACH_CRED) {
+ /* Print state protect result */
+ dfprintk(MOUNT, "Server SP4_MACH_CRED support:\n");
+ for (i = 0; i <= LAST_NFS4_OP; i++) {
+ if (test_bit(i, sp->enforce.u.longs))
+ dfprintk(MOUNT, " enforce op %d\n", i);
+ if (test_bit(i, sp->allow.u.longs))
+ dfprintk(MOUNT, " allow op %d\n", i);
+ }
+
+ /* make sure nothing is on enforce list that isn't supported */
+ for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) {
+ if (sp->enforce.u.words[i] & ~supported_enforce[i]) {
+ dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * Minimal mode - state operations are allowed to use machine
+ * credential. Note this already happens by default, so the
+ * client doesn't have to do anything more than the negotiation.
+ *
+ * NOTE: we don't care if EXCHANGE_ID is in the list -
+ * we're already using the machine cred for exchange_id
+ * and will never use a different cred.
+ */
+ if (test_bit(OP_BIND_CONN_TO_SESSION, sp->enforce.u.longs) &&
+ test_bit(OP_CREATE_SESSION, sp->enforce.u.longs) &&
+ test_bit(OP_DESTROY_SESSION, sp->enforce.u.longs) &&
+ test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) {
+ dfprintk(MOUNT, "sp4_mach_cred:\n");
+ dfprintk(MOUNT, " minimal mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags);
+ } else {
+ dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
+ return -EINVAL;
+ }
+
+ if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
+ test_bit(OP_LOCKU, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " cleanup mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
+ }
+
+ if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
+ test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " secinfo mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags);
+ }
+
+ if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) &&
+ test_bit(OP_FREE_STATEID, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " stateid mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags);
+ }
+
+ if (test_bit(OP_WRITE, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " write mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags);
+ }
+
+ if (test_bit(OP_COMMIT, sp->allow.u.longs)) {
+ dfprintk(MOUNT, " commit mode enabled\n");
+ set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * _nfs4_proc_exchange_id()
*
- * Since the clientid has expired, all compounds using sessions
- * associated with the stale clientid will be returning
- * NFS4ERR_BADSESSION in the sequence operation, and will therefore
- * be in some phase of session reset.
+ * Wrapper for EXCHANGE_ID operation.
*/
-int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+ u32 sp4_how)
{
nfs4_verifier verifier;
struct nfs41_exchange_id_args args = {
.verifier = &verifier,
.client = clp,
- .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER,
+#ifdef CONFIG_NFS_V4_1_MIGRATION
+ .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
+ EXCHGID4_FLAG_BIND_PRINC_STATEID |
+ EXCHGID4_FLAG_SUPP_MOVED_MIGR,
+#else
+ .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER |
+ EXCHGID4_FLAG_BIND_PRINC_STATEID,
+#endif
};
struct nfs41_exchange_id_res res = {
0
@@ -5309,10 +6666,30 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
goto out_server_scope;
}
+ switch (sp4_how) {
+ case SP4_NONE:
+ args.state_protect.how = SP4_NONE;
+ break;
+
+ case SP4_MACH_CRED:
+ args.state_protect = nfs4_sp4_mach_cred_request;
+ break;
+
+ default:
+ /* unsupported! */
+ WARN_ON_ONCE(1);
+ status = -EINVAL;
+ goto out_server_scope;
+ }
+
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_exchange_id(clp, status);
if (status == 0)
status = nfs4_check_cl_exchange_flags(res.flags);
+ if (status == 0)
+ status = nfs4_sp4_select_mode(clp, &res.state_protect);
+
if (status == 0) {
clp->cl_clientid = res.clientid;
clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
@@ -5359,6 +6736,35 @@ out:
return status;
}
+/*
+ * nfs4_proc_exchange_id()
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ *
+ * Since the clientid has expired, all compounds using sessions
+ * associated with the stale clientid will be returning
+ * NFS4ERR_BADSESSION in the sequence operation, and will therefore
+ * be in some phase of session reset.
+ *
+ * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used.
+ */
+int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+{
+ rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor;
+ int status;
+
+ /* try SP4_MACH_CRED if krb5i/p */
+ if (authflavor == RPC_AUTH_GSS_KRB5I ||
+ authflavor == RPC_AUTH_GSS_KRB5P) {
+ status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED);
+ if (!status)
+ return 0;
+ }
+
+ /* try SP4_NONE */
+ return _nfs4_proc_exchange_id(clp, cred, SP4_NONE);
+}
+
static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
struct rpc_cred *cred)
{
@@ -5370,6 +6776,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
int status;
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_destroy_clientid(clp, status);
if (status)
dprintk("NFS: Got error %d from the server %s on "
"DESTROY_CLIENTID.", status, clp->cl_hostname);
@@ -5407,7 +6814,7 @@ int nfs4_destroy_clientid(struct nfs_client *clp)
goto out;
if (clp->cl_preserve_clid)
goto out;
- cred = nfs4_get_exchange_id_cred(clp);
+ cred = nfs4_get_clid_cred(clp);
ret = nfs4_proc_destroy_clientid(clp, cred);
if (cred)
put_rpccred(cred);
@@ -5499,7 +6906,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
};
int status;
- nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
+ nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
nfs4_set_sequence_privileged(&args.la_seq_args);
dprintk("--> %s\n", __func__);
task = rpc_run_task(&task_setup);
@@ -5526,17 +6933,14 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
*/
static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
{
- struct nfs4_session *session = args->client->cl_session;
- unsigned int mxrqst_sz = session->fc_target_max_rqst_sz,
- mxresp_sz = session->fc_target_max_resp_sz;
+ unsigned int max_rqst_sz, max_resp_sz;
+
+ max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead;
+ max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead;
- if (mxrqst_sz == 0)
- mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
- if (mxresp_sz == 0)
- mxresp_sz = NFS_MAX_FILE_IO_SIZE;
/* Fore channel attributes */
- args->fc_attrs.max_rqst_sz = mxrqst_sz;
- args->fc_attrs.max_resp_sz = mxresp_sz;
+ args->fc_attrs.max_rqst_sz = max_rqst_sz;
+ args->fc_attrs.max_resp_sz = max_resp_sz;
args->fc_attrs.max_ops = NFS4_MAX_OPS;
args->fc_attrs.max_reqs = max_session_slots;
@@ -5636,6 +7040,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_create_session(clp, status);
if (!status) {
/* Verify the session's negotiated channel_attrs values */
@@ -5699,6 +7104,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
return status;
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ trace_nfs4_destroy_session(session->clp, status);
if (status)
dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
@@ -5748,6 +7154,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
return;
+ trace_nfs4_sequence(clp, task->tk_status);
if (task->tk_status < 0) {
dprintk("%s ERROR %d\n", __func__, task->tk_status);
if (atomic_read(&clp->cl_count) == 1)
@@ -5795,7 +7202,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
.rpc_client = clp->cl_rpcclient,
.rpc_message = &msg,
.callback_ops = &nfs41_sequence_ops,
- .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
};
if (!atomic_inc_not_zero(&clp->cl_count))
@@ -5805,7 +7212,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
nfs_put_client(clp);
return ERR_PTR(-ENOMEM);
}
- nfs41_init_sequence(&calldata->args, &calldata->res, 0);
+ nfs4_init_sequence(&calldata->args, &calldata->res, 0);
if (is_privileged)
nfs4_set_sequence_privileged(&calldata->args);
msg.rpc_argp = &calldata->args;
@@ -5900,6 +7307,7 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
if (!nfs41_sequence_done(task, res))
return;
+ trace_nfs4_reclaim_complete(clp, task->tk_status);
if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
rpc_restart_call_prepare(task);
return;
@@ -5923,12 +7331,14 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = {
/*
* Issue a global reclaim complete.
*/
-static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
+static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
+ struct rpc_cred *cred)
{
struct nfs4_reclaim_complete_data *calldata;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE],
+ .rpc_cred = cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clp->cl_rpcclient,
@@ -5945,7 +7355,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
calldata->clp = clp;
calldata->arg.one_fs = 0;
- nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
+ nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
nfs4_set_sequence_privileged(&calldata->arg.seq_args);
msg.rpc_argp = &calldata->arg;
msg.rpc_resp = &calldata->res;
@@ -5995,8 +7405,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
+ unsigned long timeo, now, giveup;
- dprintk("--> %s\n", __func__);
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
@@ -6004,9 +7415,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+ /*
+ * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+ * (or clients) writing to the same RAID stripe
+ */
case -NFS4ERR_LAYOUTTRYLATER:
+ /*
+ * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+ * existing layout before getting a new one).
+ */
case -NFS4ERR_RECALLCONFLICT:
- task->tk_status = -NFS4ERR_DELAY;
+ timeo = rpc_get_timeout(task->tk_client);
+ giveup = lgp->args.timestamp + timeo;
+ now = jiffies;
+ if (time_after(giveup, now)) {
+ unsigned long delay;
+
+ /* Delay for:
+ * - Not less then NFS4_POLL_RETRY_MIN.
+ * - One last time a jiffie before we give up
+ * - exponential backoff (time_now minus start_attempt)
+ */
+ delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+ min((giveup - now - 1),
+ now - lgp->args.timestamp));
+
+ dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+ __func__, delay);
+ rpc_delay(task, delay);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out; /* Do not call nfs4_async_handle_error() */
+ }
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
@@ -6079,11 +7519,13 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
static void nfs4_layoutget_release(void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
- struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ struct inode *inode = lgp->args.inode;
+ struct nfs_server *server = NFS_SERVER(inode);
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
nfs4_free_pages(lgp->args.layout.pages, max_pages);
+ pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
kfree(calldata);
dprintk("<-- %s\n", __func__);
@@ -6098,13 +7540,15 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
struct pnfs_layout_segment *
nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
{
- struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ struct inode *inode = lgp->args.inode;
+ struct nfs_server *server = NFS_SERVER(inode);
size_t max_pages = max_response_pages(server);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
.rpc_argp = &lgp->args,
.rpc_resp = &lgp->res,
+ .rpc_cred = lgp->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = server->client,
@@ -6124,17 +7568,27 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
return ERR_PTR(-ENOMEM);
}
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+ lgp->args.timestamp = jiffies;
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
- nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
+ nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
+
+ /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+ pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return ERR_CAST(task);
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0)
status = task->tk_status;
- if (status == 0)
+ trace_nfs4_layoutget(lgp->args.ctx,
+ &lgp->args.range,
+ &lgp->res.range,
+ status);
+ /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+ if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
@@ -6166,7 +7620,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
server = NFS_SERVER(lrp->args.inode);
- if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+ switch (task->tk_status) {
+ default:
+ task->tk_status = 0;
+ case 0:
+ break;
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+ break;
rpc_restart_call_prepare(task);
return;
}
@@ -6202,9 +7663,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
.rpc_argp = &lrp->args,
.rpc_resp = &lrp->res,
+ .rpc_cred = lrp->cred,
};
struct rpc_task_setup task_setup_data = {
- .rpc_client = lrp->clp->cl_rpcclient,
+ .rpc_client = NFS_SERVER(lrp->args.inode)->client,
.rpc_message = &msg,
.callback_ops = &nfs4_layoutreturn_call_ops,
.callback_data = lrp,
@@ -6212,11 +7674,12 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
int status;
dprintk("--> %s\n", __func__);
- nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
+ nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
+ trace_nfs4_layoutreturn(lrp->args.inode, status);
dprintk("<-- %s status=%d\n", __func__, status);
rpc_put_task(task);
return status;
@@ -6271,7 +7734,9 @@ int nfs4_proc_getdevicelist(struct nfs_server *server,
EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist);
static int
-_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+_nfs4_proc_getdeviceinfo(struct nfs_server *server,
+ struct pnfs_device *pdev,
+ struct rpc_cred *cred)
{
struct nfs4_getdeviceinfo_args args = {
.pdev = pdev,
@@ -6283,6 +7748,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
.rpc_argp = &args,
.rpc_resp = &res,
+ .rpc_cred = cred,
};
int status;
@@ -6293,14 +7759,16 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
return status;
}
-int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
+int nfs4_proc_getdeviceinfo(struct nfs_server *server,
+ struct pnfs_device *pdev,
+ struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
int err;
do {
err = nfs4_handle_exception(server,
- _nfs4_proc_getdeviceinfo(server, pdev),
+ _nfs4_proc_getdeviceinfo(server, pdev, cred),
&exception);
} while (exception.retry);
return err;
@@ -6334,10 +7802,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_BADLAYOUT: /* no layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
- break;
case 0:
- nfs_post_op_update_inode_force_wcc(data->args.inode,
- data->res.fattr);
break;
default:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
@@ -6350,22 +7815,10 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
- struct pnfs_layout_segment *lseg, *tmp;
- unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
pnfs_cleanup_layoutcommit(data);
- /* Matched by references in pnfs_set_layoutcommit */
- list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
- list_del_init(&lseg->pls_lc_list);
- if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
- &lseg->pls_flags))
- pnfs_put_lseg(lseg);
- }
-
- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
- smp_mb__after_clear_bit();
- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
+ nfs_post_op_update_inode_force_wcc(data->args.inode,
+ data->res.fattr);
put_rpccred(data->cred);
kfree(data);
}
@@ -6402,7 +7855,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
data->args.lastbytewritten,
data->args.inode->i_ino);
- nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -6412,15 +7865,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
if (status != 0)
goto out;
status = task->tk_status;
+ trace_nfs4_layoutcommit(data->args.inode, status);
out:
dprintk("%s: status %d\n", __func__, status);
rpc_put_task(task);
return status;
}
+/**
+ * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
+ * possible) as per RFC3530bis and RFC5661 Security Considerations sections
+ */
static int
_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+ struct nfs_fsinfo *info,
+ struct nfs4_secinfo_flavors *flavors, bool use_integrity)
{
struct nfs41_secinfo_no_name_args args = {
.style = SECINFO_STYLE_CURRENT_FH,
@@ -6433,7 +7892,25 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_argp = &args,
.rpc_resp = &res,
};
- return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+ struct rpc_clnt *clnt = server->client;
+ struct rpc_cred *cred = NULL;
+ int status;
+
+ if (use_integrity) {
+ clnt = server->nfs_client->cl_rpcclient;
+ cred = nfs4_get_clid_cred(server->nfs_client);
+ msg.rpc_cred = cred;
+ }
+
+ dprintk("--> %s\n", __func__);
+ status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
+ dprintk("<-- %s status=%d\n", __func__, status);
+
+ if (cred)
+ put_rpccred(cred);
+
+ return status;
}
static int
@@ -6443,11 +7920,28 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+ /* first try using integrity protection */
+ err = -NFS4ERR_WRONGSEC;
+
+ /* try to use integrity protection with machine cred */
+ if (_nfs4_is_integrity_protected(server->nfs_client))
+ err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ flavors, true);
+
+ /*
+ * if unable to use integrity protection, or SECINFO with
+ * integrity protection returns NFS4ERR_WRONGSEC (which is
+ * disallowed by spec, but exists in deployed servers) use
+ * the current filesystem's rpc_client and the user cred.
+ */
+ if (err == -NFS4ERR_WRONGSEC)
+ err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+ flavors, false);
+
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
@@ -6463,8 +7957,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
{
int err;
struct page *page;
- rpc_authflavor_t flavor;
+ rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
struct nfs4_secinfo_flavors *flavors;
+ struct nfs4_secinfo4 *secinfo;
+ int i;
page = alloc_page(GFP_KERNEL);
if (!page) {
@@ -6479,16 +7975,41 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
- if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
if (err)
goto out_freepage;
- flavor = nfs_find_best_sec(flavors);
- if (err == 0)
- err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+ for (i = 0; i < flavors->num_flavors; i++) {
+ secinfo = &flavors->flavors[i];
+
+ switch (secinfo->flavor) {
+ case RPC_AUTH_NULL:
+ case RPC_AUTH_UNIX:
+ case RPC_AUTH_GSS:
+ flavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+ &secinfo->flavor_info);
+ break;
+ default:
+ flavor = RPC_AUTH_MAXFLAVOR;
+ break;
+ }
+
+ if (!nfs_auth_info_match(&server->auth_info, flavor))
+ flavor = RPC_AUTH_MAXFLAVOR;
+
+ if (flavor != RPC_AUTH_MAXFLAVOR) {
+ err = nfs4_lookup_root_sec(server, fhandle,
+ info, flavor);
+ if (!err)
+ break;
+ }
+ }
+
+ if (flavor == RPC_AUTH_MAXFLAVOR)
+ err = -EPERM;
out_freepage:
put_page(page);
@@ -6498,7 +8019,9 @@ out:
return err;
}
-static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
+static int _nfs41_test_stateid(struct nfs_server *server,
+ nfs4_stateid *stateid,
+ struct rpc_cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
@@ -6509,12 +8032,17 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
.rpc_argp = &args,
.rpc_resp = &res,
+ .rpc_cred = cred,
};
+ struct rpc_clnt *rpc_client = server->client;
+
+ nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID,
+ &rpc_client, &msg);
dprintk("NFS call test_stateid %p\n", stateid);
- nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(server->client, server, &msg,
+ status = nfs4_call_sync_sequence(rpc_client, server, &msg,
&args.seq_args, &res.seq_res);
if (status != NFS_OK) {
dprintk("NFS reply test_stateid: failed, %d\n", status);
@@ -6529,17 +8057,20 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
*
* @server: server / transport on which to perform the operation
* @stateid: state ID to test
+ * @cred: credential
*
* Returns NFS_OK if the server recognizes that "stateid" is valid.
* Otherwise a negative NFS4ERR value is returned if the operation
* failed or the state ID is not currently valid.
*/
-static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
+static int nfs41_test_stateid(struct nfs_server *server,
+ nfs4_stateid *stateid,
+ struct rpc_cred *cred)
{
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs41_test_stateid(server, stateid);
+ err = _nfs41_test_stateid(server, stateid, cred);
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -6547,26 +8078,81 @@ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
return err;
}
-static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
-{
- struct nfs41_free_stateid_args args = {
- .stateid = stateid,
- };
+struct nfs_free_stateid_data {
+ struct nfs_server *server;
+ struct nfs41_free_stateid_args args;
struct nfs41_free_stateid_res res;
+};
+
+static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_free_stateid_data *data = calldata;
+ nfs41_setup_sequence(nfs4_get_session(data->server),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task);
+}
+
+static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs_free_stateid_data *data = calldata;
+
+ nfs41_sequence_done(task, &data->res.seq_res);
+
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN)
+ rpc_restart_call_prepare(task);
+ }
+}
+
+static void nfs41_free_stateid_release(void *calldata)
+{
+ kfree(calldata);
+}
+
+static const struct rpc_call_ops nfs41_free_stateid_ops = {
+ .rpc_call_prepare = nfs41_free_stateid_prepare,
+ .rpc_call_done = nfs41_free_stateid_done,
+ .rpc_release = nfs41_free_stateid_release,
+};
+
+static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server,
+ nfs4_stateid *stateid,
+ struct rpc_cred *cred,
+ bool privileged)
+{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
- .rpc_argp = &args,
- .rpc_resp = &res,
+ .rpc_cred = cred,
};
- int status;
+ struct rpc_task_setup task_setup = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs41_free_stateid_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
+ struct nfs_free_stateid_data *data;
+
+ nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID,
+ &task_setup.rpc_client, &msg);
dprintk("NFS call free_stateid %p\n", stateid);
- nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
- nfs4_set_sequence_privileged(&args.seq_args);
- status = nfs4_call_sync_sequence(server->client, server, &msg,
- &args.seq_args, &res.seq_res);
- dprintk("NFS reply free_stateid: %d\n", status);
- return status;
+ data = kmalloc(sizeof(*data), GFP_NOFS);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+ data->server = server;
+ nfs4_stateid_copy(&data->args.stateid, stateid);
+
+ task_setup.callback_data = data;
+
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ if (privileged)
+ nfs4_set_sequence_privileged(&data->args.seq_args);
+
+ return rpc_run_task(&task_setup);
}
/**
@@ -6574,21 +8160,39 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
*
* @server: server / transport on which to perform the operation
* @stateid: state ID to release
+ * @cred: credential
*
* Returns NFS_OK if the server freed "stateid". Otherwise a
* negative NFS4ERR value is returned.
*/
-static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
+static int nfs41_free_stateid(struct nfs_server *server,
+ nfs4_stateid *stateid,
+ struct rpc_cred *cred)
{
- struct nfs4_exception exception = { };
- int err;
- do {
- err = _nfs4_free_stateid(server, stateid);
- if (err != -NFS4ERR_DELAY)
- break;
- nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
- return err;
+ struct rpc_task *task;
+ int ret;
+
+ task = _nfs41_free_stateid(server, stateid, cred, true);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ ret = rpc_wait_for_completion_task(task);
+ if (!ret)
+ ret = task->tk_status;
+ rpc_put_task(task);
+ return ret;
+}
+
+static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
+{
+ struct rpc_task *task;
+ struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
+
+ task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
+ nfs4_free_lock_state(server, lsp);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
}
static bool nfs41_match_stateid(const nfs4_stateid *s1,
@@ -6620,7 +8224,6 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
.recover_open = nfs4_open_reclaim,
.recover_lock = nfs4_lock_reclaim,
.establish_clid = nfs4_init_clientid,
- .get_clid_cred = nfs4_get_setclientid_cred,
.detect_trunking = nfs40_discover_server_trunking,
};
@@ -6631,7 +8234,6 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
.recover_open = nfs4_open_reclaim,
.recover_lock = nfs4_lock_reclaim,
.establish_clid = nfs41_init_clientid,
- .get_clid_cred = nfs4_get_exchange_id_cred,
.reclaim_complete = nfs41_proc_reclaim_complete,
.detect_trunking = nfs41_discover_server_trunking,
};
@@ -6643,7 +8245,6 @@ static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
.recover_open = nfs4_open_expired,
.recover_lock = nfs4_lock_expired,
.establish_clid = nfs4_init_clientid,
- .get_clid_cred = nfs4_get_setclientid_cred,
};
#if defined(CONFIG_NFS_V4_1)
@@ -6653,7 +8254,6 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
.recover_open = nfs41_open_expired,
.recover_lock = nfs41_lock_expired,
.establish_clid = nfs41_init_clientid,
- .get_clid_cred = nfs4_get_exchange_id_cred,
};
#endif /* CONFIG_NFS_V4_1 */
@@ -6671,22 +8271,73 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
};
#endif
+static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = {
+ .get_locations = _nfs40_proc_get_locations,
+ .fsid_present = _nfs40_proc_fsid_present,
+};
+
+#if defined(CONFIG_NFS_V4_1)
+static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = {
+ .get_locations = _nfs41_proc_get_locations,
+ .fsid_present = _nfs41_proc_fsid_present,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.minor_version = 0,
- .call_sync = _nfs4_call_sync,
+ .init_caps = NFS_CAP_READDIRPLUS
+ | NFS_CAP_ATOMIC_OPEN
+ | NFS_CAP_CHANGE_ATTR
+ | NFS_CAP_POSIX_LOCK,
+ .init_client = nfs40_init_client,
+ .shutdown_client = nfs40_shutdown_client,
.match_stateid = nfs4_match_stateid,
.find_root_sec = nfs4_find_root_sec,
+ .free_lock_state = nfs4_release_lockowner,
+ .call_sync_ops = &nfs40_call_sync_ops,
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
.state_renewal_ops = &nfs40_state_renewal_ops,
+ .mig_recovery_ops = &nfs40_mig_recovery_ops,
};
#if defined(CONFIG_NFS_V4_1)
static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.minor_version = 1,
- .call_sync = nfs4_call_sync_sequence,
+ .init_caps = NFS_CAP_READDIRPLUS
+ | NFS_CAP_ATOMIC_OPEN
+ | NFS_CAP_CHANGE_ATTR
+ | NFS_CAP_POSIX_LOCK
+ | NFS_CAP_STATEID_NFSV41
+ | NFS_CAP_ATOMIC_OPEN_V1,
+ .init_client = nfs41_init_client,
+ .shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
.find_root_sec = nfs41_find_root_sec,
+ .free_lock_state = nfs41_free_lock_state,
+ .call_sync_ops = &nfs41_call_sync_ops,
+ .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
+ .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
+ .state_renewal_ops = &nfs41_state_renewal_ops,
+ .mig_recovery_ops = &nfs41_mig_recovery_ops,
+};
+#endif
+
+#if defined(CONFIG_NFS_V4_2)
+static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
+ .minor_version = 2,
+ .init_caps = NFS_CAP_READDIRPLUS
+ | NFS_CAP_ATOMIC_OPEN
+ | NFS_CAP_CHANGE_ATTR
+ | NFS_CAP_POSIX_LOCK
+ | NFS_CAP_STATEID_NFSV41
+ | NFS_CAP_ATOMIC_OPEN_V1,
+ .init_client = nfs41_init_client,
+ .shutdown_client = nfs41_shutdown_client,
+ .match_stateid = nfs41_match_stateid,
+ .find_root_sec = nfs41_find_root_sec,
+ .free_lock_state = nfs41_free_lock_state,
+ .call_sync_ops = &nfs41_call_sync_ops,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
@@ -6698,9 +8349,12 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#if defined(CONFIG_NFS_V4_1)
[1] = &nfs_v4_1_minor_ops,
#endif
+#if defined(CONFIG_NFS_V4_2)
+ [2] = &nfs_v4_2_minor_ops,
+#endif
};
-const struct inode_operations nfs4_dir_inode_operations = {
+static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
.atomic_open = nfs_atomic_open,
@@ -6749,7 +8403,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.unlink_setup = nfs4_proc_unlink_setup,
.unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
.unlink_done = nfs4_proc_unlink_done,
- .rename = nfs4_proc_rename,
.rename_setup = nfs4_proc_rename_setup,
.rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
.rename_done = nfs4_proc_rename_done,
@@ -6764,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
+ .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
.read_setup = nfs4_proc_read_setup,
- .read_pageio_init = pnfs_pageio_init_read,
- .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
.read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
- .write_pageio_init = pnfs_pageio_init_write,
- .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
@@ -6797,6 +8447,9 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
const struct xattr_handler *nfs4_xattr_handlers[] = {
&nfs4_xattr_nfs4_acl_handler,
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+ &nfs4_xattr_nfs4_label_handler,
+#endif
NULL
};
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index ebda5f4a031..e799dc3c3b1 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -23,6 +23,14 @@
#define NFSDBG_FACILITY NFSDBG_STATE
+static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
+{
+ tbl->highest_used_slotid = NFS4_NO_SLOT;
+ spin_lock_init(&tbl->slot_tbl_lock);
+ rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+ init_completion(&tbl->complete);
+}
+
/*
* nfs4_shrink_slot_table - free retired slots from the slot table
*/
@@ -44,6 +52,17 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize)
}
}
+/**
+ * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete
+ * @tbl - controlling slot table
+ *
+ */
+void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
+{
+ if (nfs4_slot_tbl_draining(tbl))
+ complete(&tbl->complete);
+}
+
/*
* nfs4_free_slot - free a slot and efficiently update slot table.
*
@@ -73,10 +92,10 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
tbl->highest_used_slotid = new_max;
else {
tbl->highest_used_slotid = NFS4_NO_SLOT;
- nfs4_session_drain_complete(tbl->session, tbl);
+ nfs4_slot_tbl_drain_complete(tbl);
}
}
- dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
+ dprintk("%s: slotid %u highest_used_slotid %u\n", __func__,
slotid, tbl->highest_used_slotid);
}
@@ -146,9 +165,9 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
ret->generation = tbl->generation;
out:
- dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
+ dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
- !IS_ERR(ret) ? ret->slot_nr : -1);
+ !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);
return ret;
}
@@ -191,7 +210,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
{
int ret;
- dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
+ dprintk("--> %s: max_reqs=%u, tbl->max_slots %u\n", __func__,
max_reqs, tbl->max_slots);
if (max_reqs > NFS4_MAX_SLOT_TABLE)
@@ -205,18 +224,45 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
spin_unlock(&tbl->slot_tbl_lock);
- dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+ dprintk("%s: tbl=%p slots=%p max_slots=%u\n", __func__,
tbl, tbl->slots, tbl->max_slots);
out:
dprintk("<-- %s: return %d\n", __func__, ret);
return ret;
}
-/* Destroy the slot table */
-static void nfs4_destroy_slot_tables(struct nfs4_session *session)
+/*
+ * nfs4_release_slot_table - release all slot table entries
+ */
+static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+{
+ nfs4_shrink_slot_table(tbl, 0);
+}
+
+/**
+ * nfs4_shutdown_slot_table - release resources attached to a slot table
+ * @tbl: slot table to shut down
+ *
+ */
+void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
{
- nfs4_shrink_slot_table(&session->fc_slot_table, 0);
- nfs4_shrink_slot_table(&session->bc_slot_table, 0);
+ nfs4_release_slot_table(tbl);
+ rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
+}
+
+/**
+ * nfs4_setup_slot_table - prepare a stand-alone slot table for use
+ * @tbl: slot table to set up
+ * @max_reqs: maximum number of requests allowed
+ * @queue: name to give RPC wait queue
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs,
+ const char *queue)
+{
+ nfs4_init_slot_table(tbl, queue);
+ return nfs4_realloc_slot_table(tbl, max_reqs, 0);
}
static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
@@ -226,7 +272,7 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
struct nfs4_slot *slot = pslot;
struct nfs4_slot_table *tbl = slot->table;
- if (nfs4_session_draining(tbl->session) && !args->sa_privileged)
+ if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
return false;
slot->generation = tbl->generation;
args->sa_slot = slot;
@@ -273,6 +319,8 @@ void nfs41_wake_slot_table(struct nfs4_slot_table *tbl)
}
}
+#if defined(CONFIG_NFS_V4_1)
+
static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl,
u32 target_highest_slotid)
{
@@ -383,6 +431,12 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
spin_unlock(&tbl->slot_tbl_lock);
}
+static void nfs4_release_session_slot_tables(struct nfs4_session *session)
+{
+ nfs4_release_slot_table(&session->fc_slot_table);
+ nfs4_release_slot_table(&session->bc_slot_table);
+}
+
/*
* Initialize or reset the forechannel and backchannel tables
*/
@@ -405,43 +459,38 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
if (status && tbl->slots == NULL)
/* Fore and back channel share a connection so get
* both slot tables or neither */
- nfs4_destroy_slot_tables(ses);
+ nfs4_release_session_slot_tables(ses);
return status;
}
struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
{
struct nfs4_session *session;
- struct nfs4_slot_table *tbl;
session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
if (!session)
return NULL;
- tbl = &session->fc_slot_table;
- tbl->highest_used_slotid = NFS4_NO_SLOT;
- spin_lock_init(&tbl->slot_tbl_lock);
- rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
- init_completion(&tbl->complete);
-
- tbl = &session->bc_slot_table;
- tbl->highest_used_slotid = NFS4_NO_SLOT;
- spin_lock_init(&tbl->slot_tbl_lock);
- rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
- init_completion(&tbl->complete);
-
+ nfs4_init_slot_table(&session->fc_slot_table, "ForeChannel Slot table");
+ nfs4_init_slot_table(&session->bc_slot_table, "BackChannel Slot table");
session->session_state = 1<<NFS4_SESSION_INITING;
session->clp = clp;
return session;
}
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+ nfs4_shutdown_slot_table(&session->fc_slot_table);
+ nfs4_shutdown_slot_table(&session->bc_slot_table);
+}
+
void nfs4_destroy_session(struct nfs4_session *session)
{
struct rpc_xprt *xprt;
struct rpc_cred *cred;
- cred = nfs4_get_exchange_id_cred(session->clp);
+ cred = nfs4_get_clid_cred(session->clp);
nfs4_proc_destroy_session(session, cred);
if (cred)
put_rpccred(cred);
@@ -452,7 +501,7 @@ void nfs4_destroy_session(struct nfs4_session *session)
dprintk("%s Destroy backchannel for xprt %p\n",
__func__, xprt);
xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
- nfs4_destroy_slot_tables(session);
+ nfs4_destroy_session_slot_tables(session);
kfree(session);
}
@@ -478,48 +527,12 @@ static int nfs41_check_session_ready(struct nfs_client *clp)
return 0;
}
-int nfs4_init_session(struct nfs_server *server)
+int nfs4_init_session(struct nfs_client *clp)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_session *session;
- unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE;
- unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE;
-
if (!nfs4_has_session(clp))
return 0;
- if (server->rsize != 0)
- target_max_resp_sz = server->rsize;
- target_max_resp_sz += nfs41_maxread_overhead;
-
- if (server->wsize != 0)
- target_max_rqst_sz = server->wsize;
- target_max_rqst_sz += nfs41_maxwrite_overhead;
-
- session = clp->cl_session;
- spin_lock(&clp->cl_lock);
- if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
- /* Initialise targets and channel attributes */
- session->fc_target_max_rqst_sz = target_max_rqst_sz;
- session->fc_attrs.max_rqst_sz = target_max_rqst_sz;
- session->fc_target_max_resp_sz = target_max_resp_sz;
- session->fc_attrs.max_resp_sz = target_max_resp_sz;
- } else {
- /* Just adjust the targets */
- if (target_max_rqst_sz > session->fc_target_max_rqst_sz) {
- session->fc_target_max_rqst_sz = target_max_rqst_sz;
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
- }
- if (target_max_resp_sz > session->fc_target_max_resp_sz) {
- session->fc_target_max_resp_sz = target_max_resp_sz;
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
- }
- }
- spin_unlock(&clp->cl_lock);
-
- if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
- nfs4_schedule_lease_recovery(clp);
-
+ clear_bit(NFS4_SESSION_INITING, &clp->cl_session->session_state);
return nfs41_check_session_ready(clp);
}
@@ -549,4 +562,4 @@ int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time)
}
EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
-
+#endif /* defined(CONFIG_NFS_V4_1) */
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 6f3cb39386d..b34ada9bc6a 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -8,7 +8,7 @@
#define __LINUX_FS_NFS_NFS4SESSION_H
/* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_DEF_SLOT_TABLE_SIZE (64U)
#define NFS4_MAX_SLOT_TABLE (1024U)
#define NFS4_NO_SLOT ((u32)-1)
@@ -25,6 +25,10 @@ struct nfs4_slot {
};
/* Sessions */
+enum nfs4_slot_tbl_state {
+ NFS4_SLOT_TBL_DRAINING,
+};
+
#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
struct nfs4_slot_table {
struct nfs4_session *session; /* Parent session */
@@ -43,6 +47,7 @@ struct nfs4_slot_table {
unsigned long generation; /* Generation counter for
target_highest_slotid */
struct completion complete;
+ unsigned long slot_tbl_state;
};
/*
@@ -61,20 +66,28 @@ struct nfs4_session {
struct nfs4_channel_attrs bc_attrs;
struct nfs4_slot_table bc_slot_table;
struct nfs_client *clp;
- /* Create session arguments */
- unsigned int fc_target_max_rqst_sz;
- unsigned int fc_target_max_resp_sz;
};
enum nfs4_session_state {
NFS4_SESSION_INITING,
- NFS4_SESSION_DRAINING,
};
-#if defined(CONFIG_NFS_V4_1)
+extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
+ unsigned int max_reqs, const char *queue);
+extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
+extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
+bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
+ struct nfs4_slot *slot);
+void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
+static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
+{
+ return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
+}
+
+#if defined(CONFIG_NFS_V4_1)
extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
u32 target_highest_slotid);
extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
@@ -85,21 +98,9 @@ extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
extern void nfs4_destroy_session(struct nfs4_session *session);
-extern int nfs4_init_session(struct nfs_server *server);
+extern int nfs4_init_session(struct nfs_client *clp);
extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
-extern void nfs4_session_drain_complete(struct nfs4_session *session,
- struct nfs4_slot_table *tbl);
-
-static inline bool nfs4_session_draining(struct nfs4_session *session)
-{
- return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
-}
-
-bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
- struct nfs4_slot *slot);
-void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
-
/*
* Determine if sessions are in use.
*/
@@ -117,9 +118,19 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
return 0;
}
+#ifdef CONFIG_CRC32
+/*
+ * nfs_session_id_hash - calculate the crc32 hash for the session id
+ * @session - pointer to session
+ */
+#define nfs_session_id_hash(sess_id) \
+ (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
+#else
+#define nfs_session_id_hash(session) (0)
+#endif
#else /* defined(CONFIG_NFS_V4_1) */
-static inline int nfs4_init_session(struct nfs_server *server)
+static inline int nfs4_init_session(struct nfs_client *clp)
{
return 0;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41..848f6853c59 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
clp->cl_confirm = clid.confirm;
status = nfs40_walk_client_list(clp, result, cred);
- switch (status) {
- case -NFS4ERR_STALE_CLIENTID:
- set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- case 0:
+ if (status == 0) {
/* Sustain the lease, even if it's empty. If the clientid4
* goes stale it's of no use for trunking discovery. */
nfs4_schedule_state_renewal(*result);
- break;
}
-
out:
return status;
}
@@ -159,13 +154,14 @@ struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
return cred;
}
-static void nfs4_clear_machine_cred(struct nfs_client *clp)
+static void nfs4_root_machine_cred(struct nfs_client *clp)
{
- struct rpc_cred *cred;
+ struct rpc_cred *cred, *new;
+ new = rpc_lookup_machine_cred(NULL);
spin_lock(&clp->cl_lock);
cred = clp->cl_machine_cred;
- clp->cl_machine_cred = NULL;
+ clp->cl_machine_cred = new;
spin_unlock(&clp->cl_lock);
if (cred != NULL)
put_rpccred(cred);
@@ -219,67 +215,36 @@ out:
return cred;
}
-#if defined(CONFIG_NFS_V4_1)
-
-static int nfs41_setup_state_renewal(struct nfs_client *clp)
+static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
{
- int status;
- struct nfs_fsinfo fsinfo;
-
- if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
- nfs4_schedule_state_renewal(clp);
- return 0;
- }
-
- status = nfs4_proc_get_lease_time(clp, &fsinfo);
- if (status == 0) {
- /* Update lease time and schedule renewal */
- spin_lock(&clp->cl_lock);
- clp->cl_lease_time = fsinfo.lease_time * HZ;
- clp->cl_last_renewal = jiffies;
- spin_unlock(&clp->cl_lock);
-
- nfs4_schedule_state_renewal(clp);
+ if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
+ spin_lock(&tbl->slot_tbl_lock);
+ nfs41_wake_slot_table(tbl);
+ spin_unlock(&tbl->slot_tbl_lock);
}
-
- return status;
}
-/*
- * Back channel returns NFS4ERR_DELAY for new requests when
- * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
- * is ended.
- */
static void nfs4_end_drain_session(struct nfs_client *clp)
{
struct nfs4_session *ses = clp->cl_session;
- struct nfs4_slot_table *tbl;
- if (ses == NULL)
+ if (clp->cl_slot_tbl) {
+ nfs4_end_drain_slot_table(clp->cl_slot_tbl);
return;
- tbl = &ses->fc_slot_table;
- if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
- spin_lock(&tbl->slot_tbl_lock);
- nfs41_wake_slot_table(tbl);
- spin_unlock(&tbl->slot_tbl_lock);
}
-}
-/*
- * Signal state manager thread if session fore channel is drained
- */
-void nfs4_session_drain_complete(struct nfs4_session *session,
- struct nfs4_slot_table *tbl)
-{
- if (nfs4_session_draining(session))
- complete(&tbl->complete);
+ if (ses != NULL) {
+ nfs4_end_drain_slot_table(&ses->bc_slot_table);
+ nfs4_end_drain_slot_table(&ses->fc_slot_table);
+ }
}
-static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl)
+static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
{
+ set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
spin_lock(&tbl->slot_tbl_lock);
if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
- INIT_COMPLETION(tbl->complete);
+ reinit_completion(&tbl->complete);
spin_unlock(&tbl->slot_tbl_lock);
return wait_for_completion_interruptible(&tbl->complete);
}
@@ -292,13 +257,41 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
struct nfs4_session *ses = clp->cl_session;
int ret = 0;
- set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ if (clp->cl_slot_tbl)
+ return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
+
/* back channel */
- ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table);
+ ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
if (ret)
return ret;
/* fore channel */
- return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
+ return nfs4_drain_slot_tbl(&ses->fc_slot_table);
+}
+
+#if defined(CONFIG_NFS_V4_1)
+
+static int nfs41_setup_state_renewal(struct nfs_client *clp)
+{
+ int status;
+ struct nfs_fsinfo fsinfo;
+
+ if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
+ nfs4_schedule_state_renewal(clp);
+ return 0;
+ }
+
+ status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ if (status == 0) {
+ /* Update lease time and schedule renewal */
+ spin_lock(&clp->cl_lock);
+ clp->cl_lease_time = fsinfo.lease_time * HZ;
+ clp->cl_last_renewal = jiffies;
+ spin_unlock(&clp->cl_lock);
+
+ nfs4_schedule_state_renewal(clp);
+ }
+
+ return status;
}
static void nfs41_finish_session_reset(struct nfs_client *clp)
@@ -358,62 +351,21 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
return nfs41_walk_client_list(clp, result, cred);
}
-struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
-{
- struct rpc_cred *cred;
-
- spin_lock(&clp->cl_lock);
- cred = nfs4_get_machine_cred_locked(clp);
- spin_unlock(&clp->cl_lock);
- return cred;
-}
-
#endif /* CONFIG_NFS_V4_1 */
-static struct rpc_cred *
-nfs4_get_setclientid_cred_server(struct nfs_server *server)
-{
- struct nfs_client *clp = server->nfs_client;
- struct rpc_cred *cred = NULL;
- struct nfs4_state_owner *sp;
- struct rb_node *pos;
-
- spin_lock(&clp->cl_lock);
- pos = rb_first(&server->state_owners);
- if (pos != NULL) {
- sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
- cred = get_rpccred(sp->so_cred);
- }
- spin_unlock(&clp->cl_lock);
- return cred;
-}
-
/**
- * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * nfs4_get_clid_cred - Acquire credential for a setclientid operation
* @clp: client state handle
*
* Returns an rpc_cred with reference count bumped, or NULL.
*/
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp)
{
- struct nfs_server *server;
struct rpc_cred *cred;
spin_lock(&clp->cl_lock);
cred = nfs4_get_machine_cred_locked(clp);
spin_unlock(&clp->cl_lock);
- if (cred != NULL)
- goto out;
-
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- cred = nfs4_get_setclientid_cred_server(server);
- if (cred != NULL)
- break;
- }
- rcu_read_unlock();
-
-out:
return cred;
}
@@ -523,6 +475,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
nfs4_init_seqid_counter(&sp->so_seqid);
atomic_set(&sp->so_count, 1);
INIT_LIST_HEAD(&sp->so_lru);
+ seqcount_init(&sp->so_reclaim_seqcount);
+ mutex_init(&sp->so_delegreturn_mutex);
return sp;
}
@@ -702,6 +656,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
list_for_each_entry(state, &nfsi->open_states, inode_states) {
if (state->owner != owner)
continue;
+ if (!nfs4_valid_open_stateid(state))
+ continue;
if (atomic_inc_not_zero(&state->count))
return state;
}
@@ -934,6 +890,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
*/
void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
{
+ struct nfs_server *server;
struct nfs4_state *state;
if (lsp == NULL)
@@ -945,11 +902,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags);
spin_unlock(&state->state_lock);
+ server = state->owner->so_server;
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
- if (nfs4_release_lockowner(lsp) == 0)
- return;
- }
- nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp);
+ struct nfs_client *clp = server->nfs_client;
+
+ clp->cl_mvops->free_lock_state(server, lsp);
+ } else
+ nfs4_free_lock_state(server, lsp);
}
static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -990,13 +949,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
return 0;
}
-static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
+ struct nfs4_state *state,
const struct nfs_lockowner *lockowner)
{
struct nfs4_lock_state *lsp;
fl_owner_t fl_owner;
pid_t fl_pid;
- bool ret = false;
+ int ret = -ENOENT;
if (lockowner == NULL)
@@ -1009,9 +969,11 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
fl_pid = lockowner->l_pid;
spin_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
- if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
+ if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
+ ret = -EIO;
+ else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
nfs4_stateid_copy(dst, &lsp->ls_stateid);
- ret = true;
+ ret = 0;
}
spin_unlock(&state->state_lock);
nfs4_put_lock_state(lsp);
@@ -1021,11 +983,15 @@ out:
static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
{
+ const nfs4_stateid *src;
int seq;
do {
+ src = &zero_stateid;
seq = read_seqbegin(&state->seqlock);
- nfs4_stateid_copy(dst, &state->stateid);
+ if (test_bit(NFS_OPEN_STATE, &state->flags))
+ src = &state->open_stateid;
+ nfs4_stateid_copy(dst, src);
} while (read_seqretry(&state->seqlock, seq));
}
@@ -1033,14 +999,30 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
* Byte-range lock aware utility to initialize the stateid of read/write
* requests.
*/
-void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
+int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
fmode_t fmode, const struct nfs_lockowner *lockowner)
{
- if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
- return;
- if (nfs4_copy_lock_stateid(dst, state, lockowner))
- return;
+ int ret = nfs4_copy_lock_stateid(dst, state, lockowner);
+ if (ret == -EIO)
+ /* A lost lock - don't even consider delegations */
+ goto out;
+ /* returns true if delegation stateid found and copied */
+ if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) {
+ ret = 0;
+ goto out;
+ }
+ if (ret != -ENOENT)
+ /* nfs4_copy_delegation_stateid() didn't over-write
+ * dst, so it still has the lock stateid which we now
+ * choose to use.
+ */
+ goto out;
nfs4_copy_open_stateid(dst, state);
+ ret = 0;
+out:
+ if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
+ dst->seqid = 0;
+ return ret;
}
struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
@@ -1084,7 +1066,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
/*
* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
* failed with a seqid incrementing error -
- * see comments nfs_fs.h:seqid_mutating_error()
+ * see comments nfs4.h:seqid_mutating_error()
*/
static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
{
@@ -1129,7 +1111,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
/*
* Increment the seqid if the LOCK/LOCKU succeeded, or
* failed with a seqid incrementing error -
- * see comments nfs_fs.h:seqid_mutating_error()
+ * see comments nfs4.h:seqid_mutating_error()
*/
void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
{
@@ -1158,9 +1140,9 @@ static int nfs4_run_state_manager(void *);
static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
{
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
rpc_wake_up(&clp->cl_rpcwaitq);
}
@@ -1185,7 +1167,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
snprintf(buf, sizeof(buf), "%s-manager",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
rcu_read_unlock();
- task = kthread_run(nfs4_run_state_manager, clp, buf);
+ task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
if (IS_ERR(task)) {
printk(KERN_ERR "%s: kthread_run: %ld\n",
__func__, PTR_ERR(task));
@@ -1210,20 +1192,74 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
+/**
+ * nfs4_schedule_migration_recovery - trigger migration recovery
+ *
+ * @server: FSID that is migrating
+ *
+ * Returns zero if recovery has started, otherwise a negative NFS4ERR
+ * value is returned.
+ */
+int nfs4_schedule_migration_recovery(const struct nfs_server *server)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
+ pr_err("NFS: volatile file handles not supported (server %s)\n",
+ clp->cl_hostname);
+ return -NFS4ERR_IO;
+ }
+
+ if (test_bit(NFS_MIG_FAILED, &server->mig_status))
+ return -NFS4ERR_IO;
+
+ dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
+ __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+
+ set_bit(NFS_MIG_IN_TRANSITION,
+ &((struct nfs_server *)server)->mig_status);
+ set_bit(NFS4CLNT_MOVED, &clp->cl_state);
+
+ nfs4_schedule_state_manager(clp);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
+
+/**
+ * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
+ *
+ * @clp: server to check for moved leases
+ *
+ */
+void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
+{
+ dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
+ __func__, clp->cl_clientid, clp->cl_hostname);
+
+ set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
+}
+EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
+
int nfs4_wait_clnt_recover(struct nfs_client *clp)
{
int res;
might_sleep();
+ atomic_inc(&clp->cl_count);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
- return res;
-
+ goto out;
if (clp->cl_cons_state < 0)
- return clp->cl_cons_state;
- return 0;
+ res = clp->cl_cons_state;
+out:
+ nfs_put_client(clp);
+ return res;
}
int nfs4_client_recover_expired_lease(struct nfs_client *clp)
@@ -1280,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st
return 1;
}
-static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1289,14 +1325,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s
return 1;
}
-void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
{
struct nfs_client *clp = server->nfs_client;
+ if (!nfs4_valid_open_stateid(state))
+ return -EBADF;
nfs4_state_mark_reclaim_nograce(clp, state);
dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
clp->cl_hostname);
nfs4_schedule_state_manager(clp);
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
@@ -1326,6 +1365,27 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
nfs4_schedule_state_manager(clp);
}
+static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
+{
+ struct inode *inode = state->inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ if (ctx->state != state)
+ continue;
+ set_bit(NFS_CONTEXT_BAD, &ctx->flags);
+ }
+ spin_unlock(&inode->i_lock);
+}
+
+static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
+{
+ set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
+ nfs4_state_mark_open_context_bad(state);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
@@ -1340,13 +1400,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* Guard against delegation returns and new lock/unlock calls */
down_write(&nfsi->rwsem);
/* Protect inode->i_flock using the BKL */
- lock_flocks();
+ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
status = ops->recover_lock(state, fl);
switch (status) {
case 0:
@@ -1364,8 +1424,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
goto out;
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d. "
- "Zeroing state\n", __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n",
+ __func__, status);
case -ENOMEM:
case -NFS4ERR_DENIED:
case -NFS4ERR_RECLAIM_BAD:
@@ -1373,9 +1433,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* kill_proc(fl->fl_pid, SIGLOST, 1); */
status = 0;
}
- lock_flocks();
+ spin_lock(&inode->i_lock);
}
- unlock_flocks();
+ spin_unlock(&inode->i_lock);
out:
up_write(&nfsi->rwsem);
return status;
@@ -1395,11 +1455,14 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
* recovering after a network partition or a reboot from a
* server that doesn't support a grace period.
*/
-restart:
spin_lock(&sp->so_lock);
+ raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
+restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
continue;
+ if (!nfs4_valid_open_stateid(state))
+ continue;
if (state->state == 0)
continue;
atomic_inc(&state->count);
@@ -1408,34 +1471,33 @@ restart:
if (status >= 0) {
status = nfs4_reclaim_locks(state, ops);
if (status >= 0) {
- spin_lock(&state->state_lock);
- list_for_each_entry(lock, &state->lock_states, ls_locks) {
- if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
- pr_warn_ratelimited("NFS: "
- "%s: Lock reclaim "
- "failed!\n", __func__);
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ spin_lock(&state->state_lock);
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
+ pr_warn_ratelimited("NFS: "
+ "%s: Lock reclaim "
+ "failed!\n", __func__);
+ }
+ spin_unlock(&state->state_lock);
}
- spin_unlock(&state->state_lock);
nfs4_put_open_state(state);
+ spin_lock(&sp->so_lock);
goto restart;
}
}
switch (status) {
default:
- printk(KERN_ERR "NFS: %s: unhandled error %d. "
- "Zeroing state\n", __func__, status);
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n",
+ __func__, status);
case -ENOENT:
case -ENOMEM:
case -ESTALE:
- /*
- * Open state on this file cannot be recovered
- * All we can do is revert to using the zero stateid.
- */
- memset(&state->stateid, 0,
- sizeof(state->stateid));
- /* Mark the file as being 'closed' */
- state->state = 0;
+ /* Open state on this file cannot be recovered */
+ nfs4_state_mark_recovery_failed(state, status);
break;
+ case -EAGAIN:
+ ssleep(1);
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
@@ -1454,12 +1516,17 @@ restart:
goto out_err;
}
nfs4_put_open_state(state);
+ spin_lock(&sp->so_lock);
goto restart;
}
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return 0;
out_err:
nfs4_put_open_state(state);
+ spin_lock(&sp->so_lock);
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
+ spin_unlock(&sp->so_lock);
return status;
}
@@ -1522,11 +1589,12 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
}
static void nfs4_reclaim_complete(struct nfs_client *clp,
- const struct nfs4_state_recovery_ops *ops)
+ const struct nfs4_state_recovery_ops *ops,
+ struct rpc_cred *cred)
{
/* Notify the server we're done reclaiming our state */
if (ops->reclaim_complete)
- (void)ops->reclaim_complete(clp);
+ (void)ops->reclaim_complete(clp, cred);
}
static void nfs4_clear_reclaim_server(struct nfs_server *server)
@@ -1571,9 +1639,15 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
{
+ const struct nfs4_state_recovery_ops *ops;
+ struct rpc_cred *cred;
+
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
- nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ ops = clp->cl_mvops->reboot_recovery_ops;
+ cred = nfs4_get_clid_cred(clp);
+ nfs4_reclaim_complete(clp, ops, cred);
+ put_rpccred(cred);
}
static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1600,7 +1674,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
nfs4_state_end_reclaim_reboot(clp);
break;
case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_LEASE_MOVED:
set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
@@ -1685,13 +1758,17 @@ static int nfs4_check_lease(struct nfs_client *clp)
cred = ops->get_state_renewal_cred_locked(clp);
spin_unlock(&clp->cl_lock);
if (cred == NULL) {
- cred = nfs4_get_setclientid_cred(clp);
+ cred = nfs4_get_clid_cred(clp);
status = -ENOKEY;
if (cred == NULL)
goto out;
}
status = ops->renew_lease(clp, cred);
put_rpccred(cred);
+ if (status == -ETIMEDOUT) {
+ set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ return 0;
+ }
out:
return nfs4_recovery_handle_error(clp, status);
}
@@ -1721,10 +1798,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
return -EPERM;
case -EACCES:
- if (clp->cl_machine_cred == NULL)
- return -EACCES;
- /* Handle case where the user hasn't set up machine creds */
- nfs4_clear_machine_cred(clp);
case -NFS4ERR_DELAY:
case -ETIMEDOUT:
case -EAGAIN:
@@ -1757,7 +1830,7 @@ static int nfs4_establish_lease(struct nfs_client *clp)
clp->cl_mvops->reboot_recovery_ops;
int status;
- cred = ops->get_clid_cred(clp);
+ cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
return -ENOENT;
status = ops->establish_clid(clp, cred);
@@ -1801,6 +1874,168 @@ static int nfs4_purge_lease(struct nfs_client *clp)
return 0;
}
+/*
+ * Try remote migration of one FSID from a source server to a
+ * destination server. The source server provides a list of
+ * potential destinations.
+ *
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_fs_locations *locations = NULL;
+ struct inode *inode;
+ struct page *page;
+ int status, result;
+
+ dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
+ (unsigned long long)server->fsid.major,
+ (unsigned long long)server->fsid.minor,
+ clp->cl_hostname);
+
+ result = 0;
+ page = alloc_page(GFP_KERNEL);
+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (page == NULL || locations == NULL) {
+ dprintk("<-- %s: no memory\n", __func__);
+ goto out;
+ }
+
+ inode = server->super->s_root->d_inode;
+ result = nfs4_proc_get_locations(inode, locations, page, cred);
+ if (result) {
+ dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
+ __func__, result);
+ goto out;
+ }
+
+ result = -NFS4ERR_NXIO;
+ if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
+ dprintk("<-- %s: No fs_locations data, migration skipped\n",
+ __func__);
+ goto out;
+ }
+
+ nfs4_begin_drain_session(clp);
+
+ status = nfs4_replace_transport(server, locations);
+ if (status != 0) {
+ dprintk("<-- %s: failed to replace transport: %d\n",
+ __func__, status);
+ goto out;
+ }
+
+ result = 0;
+ dprintk("<-- %s: migration succeeded\n", __func__);
+
+out:
+ if (page != NULL)
+ __free_page(page);
+ kfree(locations);
+ if (result) {
+ pr_err("NFS: migration recovery failed (server %s)\n",
+ clp->cl_hostname);
+ set_bit(NFS_MIG_FAILED, &server->mig_status);
+ }
+ return result;
+}
+
+/*
+ * Returns zero or a negative NFS4ERR status code.
+ */
+static int nfs4_handle_migration(struct nfs_client *clp)
+{
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ struct nfs_server *server;
+ struct rpc_cred *cred;
+
+ dprintk("%s: migration reported on \"%s\"\n", __func__,
+ clp->cl_hostname);
+
+ spin_lock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
+ if (cred == NULL)
+ return -NFS4ERR_NOENT;
+
+ clp->cl_mig_gen++;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ int status;
+
+ if (server->mig_gen == clp->cl_mig_gen)
+ continue;
+ server->mig_gen = clp->cl_mig_gen;
+
+ if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
+ &server->mig_status))
+ continue;
+
+ rcu_read_unlock();
+ status = nfs4_try_migration(server, cred);
+ if (status < 0) {
+ put_rpccred(cred);
+ return status;
+ }
+ goto restart;
+ }
+ rcu_read_unlock();
+ put_rpccred(cred);
+ return 0;
+}
+
+/*
+ * Test each nfs_server on the clp's cl_superblocks list to see
+ * if it's moved to another server. Stop when the server no longer
+ * returns NFS4ERR_LEASE_MOVED.
+ */
+static int nfs4_handle_lease_moved(struct nfs_client *clp)
+{
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ struct nfs_server *server;
+ struct rpc_cred *cred;
+
+ dprintk("%s: lease moved reported on \"%s\"\n", __func__,
+ clp->cl_hostname);
+
+ spin_lock(&clp->cl_lock);
+ cred = ops->get_state_renewal_cred_locked(clp);
+ spin_unlock(&clp->cl_lock);
+ if (cred == NULL)
+ return -NFS4ERR_NOENT;
+
+ clp->cl_mig_gen++;
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ struct inode *inode;
+ int status;
+
+ if (server->mig_gen == clp->cl_mig_gen)
+ continue;
+ server->mig_gen = clp->cl_mig_gen;
+
+ rcu_read_unlock();
+
+ inode = server->super->s_root->d_inode;
+ status = nfs4_proc_fsid_present(inode, cred);
+ if (status != -NFS4ERR_MOVED)
+ goto restart; /* wasn't this one */
+ if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
+ goto restart; /* there are more */
+ goto out;
+ }
+ rcu_read_unlock();
+
+out:
+ put_rpccred(cred);
+ return 0;
+}
+
/**
* nfs4_discover_server_trunking - Detect server IP address trunking
*
@@ -1819,32 +2054,19 @@ int nfs4_discover_server_trunking(struct nfs_client *clp,
{
const struct nfs4_state_recovery_ops *ops =
clp->cl_mvops->reboot_recovery_ops;
- rpc_authflavor_t *flavors, flav, save;
struct rpc_clnt *clnt;
struct rpc_cred *cred;
- int i, len, status;
+ int i, status;
dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
- len = NFS_MAX_SECFLAVORS;
- flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL);
- if (flavors == NULL) {
- status = -ENOMEM;
- goto out;
- }
- len = rpcauth_list_flavors(flavors, len);
- if (len < 0) {
- status = len;
- goto out_free;
- }
clnt = clp->cl_rpcclient;
- save = clnt->cl_auth->au_flavor;
i = 0;
mutex_lock(&nfs_clid_init_mutex);
- status = -ENOENT;
again:
- cred = ops->get_clid_cred(clp);
+ status = -ENOENT;
+ cred = nfs4_get_clid_cred(clp);
if (cred == NULL)
goto out_unlock;
@@ -1853,35 +2075,42 @@ again:
switch (status) {
case 0:
break;
-
- case -EACCES:
- if (clp->cl_machine_cred == NULL)
+ case -ETIMEDOUT:
+ if (clnt->cl_softrtry)
break;
- /* Handle case where the user hasn't set up machine creds */
- nfs4_clear_machine_cred(clp);
case -NFS4ERR_DELAY:
- case -ETIMEDOUT:
case -EAGAIN:
ssleep(1);
+ case -NFS4ERR_STALE_CLIENTID:
dprintk("NFS: %s after status %d, retrying\n",
__func__, status);
goto again;
-
+ case -EACCES:
+ if (i++ == 0) {
+ nfs4_root_machine_cred(clp);
+ goto again;
+ }
+ if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ break;
case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_WRONGSEC:
- status = -EPERM;
- if (i >= len)
+ /* No point in retrying if we already used RPC_AUTH_UNIX */
+ if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
+ status = -EPERM;
break;
-
- flav = flavors[i++];
- if (flav == save)
- flav = flavors[i++];
- clnt = rpc_clone_client_set_auth(clnt, flav);
+ }
+ clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
if (IS_ERR(clnt)) {
status = PTR_ERR(clnt);
break;
}
- clp->cl_rpcclient = clnt;
+ /* Note: this is safe because we haven't yet marked the
+ * client as ready, so we are the only user of
+ * clp->cl_rpcclient
+ */
+ clnt = xchg(&clp->cl_rpcclient, clnt);
+ rpc_shutdown_client(clnt);
+ clnt = clp->cl_rpcclient;
goto again;
case -NFS4ERR_MINOR_VERS_MISMATCH:
@@ -1892,13 +2121,15 @@ again:
case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
* in nfs4_exchange_id */
status = -EKEYEXPIRED;
+ break;
+ default:
+ pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n",
+ __func__, status);
+ status = -EIO;
}
out_unlock:
mutex_unlock(&nfs_clid_init_mutex);
-out_free:
- kfree(flavors);
-out:
dprintk("NFS: %s: status = %d\n", __func__, status);
return status;
}
@@ -2000,9 +2231,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
nfs41_handle_server_reboot(clp);
if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
- SEQ4_STATUS_ADMIN_STATE_REVOKED |
- SEQ4_STATUS_LEASE_MOVED))
+ SEQ4_STATUS_ADMIN_STATE_REVOKED))
nfs41_handle_state_revoked(clp);
+ if (flags & SEQ4_STATUS_LEASE_MOVED)
+ nfs4_schedule_lease_moved_recovery(clp);
if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
@@ -2020,10 +2252,20 @@ static int nfs4_reset_session(struct nfs_client *clp)
if (!nfs4_has_session(clp))
return 0;
nfs4_begin_drain_session(clp);
- cred = nfs4_get_exchange_id_cred(clp);
+ cred = nfs4_get_clid_cred(clp);
status = nfs4_proc_destroy_session(clp->cl_session, cred);
- if (status && status != -NFS4ERR_BADSESSION &&
- status != -NFS4ERR_DEADSESSION) {
+ switch (status) {
+ case 0:
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ break;
+ case -NFS4ERR_BACK_CHAN_BUSY:
+ case -NFS4ERR_DELAY:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ status = 0;
+ ssleep(1);
+ goto out;
+ default:
status = nfs4_recovery_handle_error(clp, status);
goto out;
}
@@ -2053,7 +2295,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
if (!nfs4_has_session(clp))
return 0;
nfs4_begin_drain_session(clp);
- cred = nfs4_get_exchange_id_cred(clp);
+ cred = nfs4_get_clid_cred(clp);
ret = nfs4_proc_bind_conn_to_session(clp, cred);
if (cred)
put_rpccred(cred);
@@ -2074,7 +2316,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
}
#else /* CONFIG_NFS_V4_1 */
static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
-static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
static int nfs4_bind_conn_to_session(struct nfs_client *clp)
{
@@ -2131,7 +2372,20 @@ static void nfs4_state_manager(struct nfs_client *clp)
status = nfs4_check_lease(clp);
if (status < 0)
goto out_error;
- continue;
+ }
+
+ if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
+ section = "migration";
+ status = nfs4_handle_migration(clp);
+ if (status < 0)
+ goto out_error;
+ }
+
+ if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
+ section = "lease moved";
+ status = nfs4_handle_lease_moved(clp);
+ if (status < 0)
+ goto out_error;
}
/* First recover reboot state... */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 84d2e9e2f31..6f340f02f2b 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -9,6 +9,7 @@
#include "delegation.h"
#include "internal.h"
#include "nfs4_fs.h"
+#include "dns_resolve.h"
#include "pnfs.h"
#include "nfs.h"
@@ -28,7 +29,7 @@ static struct file_system_type nfs4_remote_fs_type = {
.name = "nfs4",
.mount = nfs4_remote_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static struct file_system_type nfs4_remote_referral_fs_type = {
@@ -36,7 +37,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
.name = "nfs4",
.mount = nfs4_remote_referral_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
struct file_system_type nfs4_referral_fs_type = {
@@ -44,7 +45,7 @@ struct file_system_type nfs4_referral_fs_type = {
.name = "nfs4",
.mount = nfs4_referral_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
static const struct super_operations nfs4_sops = {
@@ -76,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret = nfs_write_inode(inode, wbc);
- if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
- int status;
- bool sync = true;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- sync = false;
-
- status = pnfs_layoutcommit_inode(inode, sync);
- if (status < 0)
- return status;
- }
+ if (ret == 0)
+ ret = pnfs_layoutcommit_inode(inode,
+ wbc->sync_mode == WB_SYNC_ALL);
return ret;
}
@@ -97,7 +90,7 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
*/
static void nfs4_evict_inode(struct inode *inode)
{
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
pnfs_return_layout(inode);
pnfs_destroy_layout(NFS_I(inode));
@@ -260,9 +253,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name,
res = nfs_follow_remote_path(root_mnt, export_path);
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
+ PTR_ERR_OR_ZERO(res),
+ IS_ERR(res) ? " [error]" : "");
return res;
}
@@ -318,9 +311,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
data->mnt_path = export_path;
res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
+ dprintk("<-- nfs4_referral_mount() = %d%s\n",
+ PTR_ERR_OR_ZERO(res),
+ IS_ERR(res) ? " [error]" : "");
return res;
}
@@ -329,18 +322,24 @@ static int __init init_nfs_v4(void)
{
int err;
- err = nfs_idmap_init();
+ err = nfs_dns_resolver_init();
if (err)
goto out;
- err = nfs4_register_sysctl();
+ err = nfs_idmap_init();
if (err)
goto out1;
+ err = nfs4_register_sysctl();
+ if (err)
+ goto out2;
+
register_nfs_version(&nfs_v4);
return 0;
-out1:
+out2:
nfs_idmap_quit();
+out1:
+ nfs_dns_resolver_destroy();
out:
return err;
}
@@ -350,6 +349,7 @@ static void __exit exit_nfs_v4(void)
unregister_nfs_version(&nfs_v4);
nfs4_unregister_sysctl();
nfs_idmap_quit();
+ nfs_dns_resolver_destroy();
}
MODULE_LICENSE("GPL");
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 2628d921b7e..b6ebe7e445f 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
static struct ctl_table_header *nfs4_callback_sysctl_table;
-static ctl_table nfs4_cb_sysctls[] = {
+static struct ctl_table nfs4_cb_sysctls[] = {
{
.procname = "nfs_callback_tcpport",
.data = &nfs_callback_set_tcpport,
@@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_dir[] = {
+static struct ctl_table nfs4_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs4_cb_sysctl_root[] = {
+static struct ctl_table nfs4_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
new file mode 100644
index 00000000000..d774335cc8b
--- /dev/null
+++ b/fs/nfs/nfs4trace.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
+#include "internal.h"
+#include "nfs4session.h"
+#include "callback.h"
+
+#define CREATE_TRACE_POINTS
+#include "nfs4trace.h"
+
+#ifdef CONFIG_NFS_V4_1
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds);
+#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
new file mode 100644
index 00000000000..0a744f3a86f
--- /dev/null
+++ b/fs/nfs/nfs4trace.h
@@ -0,0 +1,1148 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nfs4
+
+#if !defined(_TRACE_NFS4_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NFS4_H
+
+#include <linux/tracepoint.h>
+
+#define show_nfsv4_errors(error) \
+ __print_symbolic(error, \
+ { NFS4_OK, "OK" }, \
+ /* Mapped by nfs4_stat_to_errno() */ \
+ { -EPERM, "EPERM" }, \
+ { -ENOENT, "ENOENT" }, \
+ { -EIO, "EIO" }, \
+ { -ENXIO, "ENXIO" }, \
+ { -EACCES, "EACCES" }, \
+ { -EEXIST, "EEXIST" }, \
+ { -EXDEV, "EXDEV" }, \
+ { -ENOTDIR, "ENOTDIR" }, \
+ { -EISDIR, "EISDIR" }, \
+ { -EFBIG, "EFBIG" }, \
+ { -ENOSPC, "ENOSPC" }, \
+ { -EROFS, "EROFS" }, \
+ { -EMLINK, "EMLINK" }, \
+ { -ENAMETOOLONG, "ENAMETOOLONG" }, \
+ { -ENOTEMPTY, "ENOTEMPTY" }, \
+ { -EDQUOT, "EDQUOT" }, \
+ { -ESTALE, "ESTALE" }, \
+ { -EBADHANDLE, "EBADHANDLE" }, \
+ { -EBADCOOKIE, "EBADCOOKIE" }, \
+ { -ENOTSUPP, "ENOTSUPP" }, \
+ { -ETOOSMALL, "ETOOSMALL" }, \
+ { -EREMOTEIO, "EREMOTEIO" }, \
+ { -EBADTYPE, "EBADTYPE" }, \
+ { -EAGAIN, "EAGAIN" }, \
+ { -ELOOP, "ELOOP" }, \
+ { -EOPNOTSUPP, "EOPNOTSUPP" }, \
+ { -EDEADLK, "EDEADLK" }, \
+ /* RPC errors */ \
+ { -ENOMEM, "ENOMEM" }, \
+ { -EKEYEXPIRED, "EKEYEXPIRED" }, \
+ { -ETIMEDOUT, "ETIMEDOUT" }, \
+ { -ERESTARTSYS, "ERESTARTSYS" }, \
+ { -ECONNREFUSED, "ECONNREFUSED" }, \
+ { -ECONNRESET, "ECONNRESET" }, \
+ { -ENETUNREACH, "ENETUNREACH" }, \
+ { -EHOSTUNREACH, "EHOSTUNREACH" }, \
+ { -EHOSTDOWN, "EHOSTDOWN" }, \
+ { -EPIPE, "EPIPE" }, \
+ { -EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+ { -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+ /* NFSv4 native errors */ \
+ { -NFS4ERR_ACCESS, "ACCESS" }, \
+ { -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+ { -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+ { -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+ { -NFS4ERR_BADCHAR, "BADCHAR" }, \
+ { -NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+ { -NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+ { -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+ { -NFS4ERR_BADLABEL, "BADLABEL" }, \
+ { -NFS4ERR_BADNAME, "BADNAME" }, \
+ { -NFS4ERR_BADOWNER, "BADOWNER" }, \
+ { -NFS4ERR_BADSESSION, "BADSESSION" }, \
+ { -NFS4ERR_BADSLOT, "BADSLOT" }, \
+ { -NFS4ERR_BADTYPE, "BADTYPE" }, \
+ { -NFS4ERR_BADXDR, "BADXDR" }, \
+ { -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+ { -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+ { -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+ { -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+ { -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+ { -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+ { -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+ { -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+ { -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+ { -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+ { -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
+ "CONN_NOT_BOUND_TO_SESSION" }, \
+ { -NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+ { -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+ { -NFS4ERR_DELAY, "DELAY" }, \
+ { -NFS4ERR_DELEG_ALREADY_WANTED, \
+ "DELEG_ALREADY_WANTED" }, \
+ { -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+ { -NFS4ERR_DENIED, "DENIED" }, \
+ { -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+ { -NFS4ERR_DQUOT, "DQUOT" }, \
+ { -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+ { -NFS4ERR_EXIST, "EXIST" }, \
+ { -NFS4ERR_EXPIRED, "EXPIRED" }, \
+ { -NFS4ERR_FBIG, "FBIG" }, \
+ { -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+ { -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+ { -NFS4ERR_GRACE, "GRACE" }, \
+ { -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+ { -NFS4ERR_INVAL, "INVAL" }, \
+ { -NFS4ERR_IO, "IO" }, \
+ { -NFS4ERR_ISDIR, "ISDIR" }, \
+ { -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+ { -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+ { -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+ { -NFS4ERR_LOCKED, "LOCKED" }, \
+ { -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+ { -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+ { -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+ { -NFS4ERR_MLINK, "MLINK" }, \
+ { -NFS4ERR_MOVED, "MOVED" }, \
+ { -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+ { -NFS4ERR_NOENT, "NOENT" }, \
+ { -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+ { -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+ { -NFS4ERR_NOSPC, "NOSPC" }, \
+ { -NFS4ERR_NOTDIR, "NOTDIR" }, \
+ { -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+ { -NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+ { -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+ { -NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+ { -NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+ { -NFS4ERR_NXIO, "NXIO" }, \
+ { -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+ { -NFS4ERR_OPENMODE, "OPENMODE" }, \
+ { -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+ { -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+ { -NFS4ERR_PERM, "PERM" }, \
+ { -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+ { -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+ { -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+ { -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+ { -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+ { -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+ { -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+ { -NFS4ERR_REP_TOO_BIG_TO_CACHE, \
+ "REP_TOO_BIG_TO_CACHE" }, \
+ { -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+ { -NFS4ERR_RESOURCE, "RESOURCE" }, \
+ { -NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+ { -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+ { -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+ { -NFS4ERR_ROFS, "ROFS" }, \
+ { -NFS4ERR_SAME, "SAME" }, \
+ { -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+ { -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+ { -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+ { -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+ { -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+ { -NFS4ERR_STALE, "STALE" }, \
+ { -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+ { -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+ { -NFS4ERR_SYMLINK, "SYMLINK" }, \
+ { -NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+ { -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+ { -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+ { -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+ { -NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+ { -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+ { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+ { -NFS4ERR_XDEV, "XDEV" })
+
+#define show_open_flags(flags) \
+ __print_flags(flags, "|", \
+ { O_CREAT, "O_CREAT" }, \
+ { O_EXCL, "O_EXCL" }, \
+ { O_TRUNC, "O_TRUNC" }, \
+ { O_DIRECT, "O_DIRECT" })
+
+#define show_fmode_flags(mode) \
+ __print_flags(mode, "|", \
+ { ((__force unsigned long)FMODE_READ), "READ" }, \
+ { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+ { ((__force unsigned long)FMODE_EXEC), "EXEC" })
+
+#define show_nfs_fattr_flags(valid) \
+ __print_flags((unsigned long)valid, "|", \
+ { NFS_ATTR_FATTR_TYPE, "TYPE" }, \
+ { NFS_ATTR_FATTR_MODE, "MODE" }, \
+ { NFS_ATTR_FATTR_NLINK, "NLINK" }, \
+ { NFS_ATTR_FATTR_OWNER, "OWNER" }, \
+ { NFS_ATTR_FATTR_GROUP, "GROUP" }, \
+ { NFS_ATTR_FATTR_RDEV, "RDEV" }, \
+ { NFS_ATTR_FATTR_SIZE, "SIZE" }, \
+ { NFS_ATTR_FATTR_FSID, "FSID" }, \
+ { NFS_ATTR_FATTR_FILEID, "FILEID" }, \
+ { NFS_ATTR_FATTR_ATIME, "ATIME" }, \
+ { NFS_ATTR_FATTR_MTIME, "MTIME" }, \
+ { NFS_ATTR_FATTR_CTIME, "CTIME" }, \
+ { NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \
+ { NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \
+ { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" })
+
+DECLARE_EVENT_CLASS(nfs4_clientid_event,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ int error
+ ),
+
+ TP_ARGS(clp, error),
+
+ TP_STRUCT__entry(
+ __string(dstaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR))
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __assign_str(dstaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR));
+ ),
+
+ TP_printk(
+ "error=%d (%s) dstaddr=%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ __get_str(dstaddr)
+ )
+);
+#define DEFINE_NFS4_CLIENTID_EVENT(name) \
+ DEFINE_EVENT(nfs4_clientid_event, name, \
+ TP_PROTO( \
+ const struct nfs_client *clp, \
+ int error \
+ ), \
+ TP_ARGS(clp, error))
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid_confirm);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew_async);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_exchange_id);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_create_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_clientid);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
+
+TRACE_EVENT(nfs4_setup_sequence,
+ TP_PROTO(
+ const struct nfs4_session *session,
+ const struct nfs4_sequence_args *args
+ ),
+ TP_ARGS(session, args),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, session)
+ __field(unsigned int, slot_nr)
+ __field(unsigned int, seq_nr)
+ __field(unsigned int, highest_used_slotid)
+ ),
+
+ TP_fast_assign(
+ const struct nfs4_slot *sa_slot = args->sa_slot;
+ __entry->session = nfs_session_id_hash(&session->sess_id);
+ __entry->slot_nr = sa_slot->slot_nr;
+ __entry->seq_nr = sa_slot->seq_nr;
+ __entry->highest_used_slotid =
+ sa_slot->table->highest_used_slotid;
+ ),
+ TP_printk(
+ "session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_used_slotid=%u",
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+ __entry->highest_used_slotid
+ )
+);
+
+#define show_nfs4_sequence_status_flags(status) \
+ __print_flags((unsigned long)status, "|", \
+ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \
+ "CB_GSS_CONTEXTS_EXPIRING" }, \
+ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \
+ "CB_GSS_CONTEXTS_EXPIRED" }, \
+ { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \
+ "EXPIRED_ALL_STATE_REVOKED" }, \
+ { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \
+ "EXPIRED_SOME_STATE_REVOKED" }, \
+ { SEQ4_STATUS_ADMIN_STATE_REVOKED, \
+ "ADMIN_STATE_REVOKED" }, \
+ { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \
+ "RECALLABLE_STATE_REVOKED" }, \
+ { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
+ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \
+ "RESTART_RECLAIM_NEEDED" }, \
+ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \
+ "CB_PATH_DOWN_SESSION" }, \
+ { SEQ4_STATUS_BACKCHANNEL_FAULT, \
+ "BACKCHANNEL_FAULT" })
+
+TRACE_EVENT(nfs4_sequence_done,
+ TP_PROTO(
+ const struct nfs4_session *session,
+ const struct nfs4_sequence_res *res
+ ),
+ TP_ARGS(session, res),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, session)
+ __field(unsigned int, slot_nr)
+ __field(unsigned int, seq_nr)
+ __field(unsigned int, highest_slotid)
+ __field(unsigned int, target_highest_slotid)
+ __field(unsigned int, status_flags)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct nfs4_slot *sr_slot = res->sr_slot;
+ __entry->session = nfs_session_id_hash(&session->sess_id);
+ __entry->slot_nr = sr_slot->slot_nr;
+ __entry->seq_nr = sr_slot->seq_nr;
+ __entry->highest_slotid = res->sr_highest_slotid;
+ __entry->target_highest_slotid =
+ res->sr_target_highest_slotid;
+ __entry->error = res->sr_status;
+ ),
+ TP_printk(
+ "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_slotid=%u target_highest_slotid=%u "
+ "status_flags=%u (%s)",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+ __entry->highest_slotid,
+ __entry->target_highest_slotid,
+ __entry->status_flags,
+ show_nfs4_sequence_status_flags(__entry->status_flags)
+ )
+);
+
+struct cb_sequenceargs;
+struct cb_sequenceres;
+
+TRACE_EVENT(nfs4_cb_sequence,
+ TP_PROTO(
+ const struct cb_sequenceargs *args,
+ const struct cb_sequenceres *res,
+ __be32 status
+ ),
+ TP_ARGS(args, res, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, session)
+ __field(unsigned int, slot_nr)
+ __field(unsigned int, seq_nr)
+ __field(unsigned int, highest_slotid)
+ __field(unsigned int, cachethis)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->session = nfs_session_id_hash(&args->csa_sessionid);
+ __entry->slot_nr = args->csa_slotid;
+ __entry->seq_nr = args->csa_sequenceid;
+ __entry->highest_slotid = args->csa_highestslotid;
+ __entry->cachethis = args->csa_cachethis;
+ __entry->error = -be32_to_cpu(status);
+ ),
+
+ TP_printk(
+ "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_slotid=%u",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+ __entry->highest_slotid
+ )
+);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_open_event,
+ TP_PROTO(
+ const struct nfs_open_context *ctx,
+ int flags,
+ int error
+ ),
+
+ TP_ARGS(ctx, flags, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(unsigned int, flags)
+ __field(unsigned int, fmode)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, dir)
+ __string(name, ctx->dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ const struct nfs4_state *state = ctx->state;
+ const struct inode *inode = NULL;
+
+ __entry->error = error;
+ __entry->flags = flags;
+ __entry->fmode = (__force unsigned int)ctx->mode;
+ __entry->dev = ctx->dentry->d_sb->s_dev;
+ if (!IS_ERR(state))
+ inode = state->inode;
+ if (inode != NULL) {
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ } else {
+ __entry->fileid = 0;
+ __entry->fhandle = 0;
+ }
+ __entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode);
+ __assign_str(name, ctx->dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d (%s) flags=%d (%s) fmode=%s "
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "name=%02x:%02x:%llu/%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ __entry->flags,
+ show_open_flags(__entry->flags),
+ show_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS4_OPEN_EVENT(name) \
+ DEFINE_EVENT(nfs4_open_event, name, \
+ TP_PROTO( \
+ const struct nfs_open_context *ctx, \
+ int flags, \
+ int error \
+ ), \
+ TP_ARGS(ctx, flags, error))
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim);
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired);
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_file);
+
+TRACE_EVENT(nfs4_close,
+ TP_PROTO(
+ const struct nfs4_state *state,
+ const struct nfs_closeargs *args,
+ const struct nfs_closeres *res,
+ int error
+ ),
+
+ TP_ARGS(state, args, res, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(unsigned int, fmode)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fmode = (__force unsigned int)state->state;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu "
+ "fhandle=0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ __entry->fmode ? show_fmode_flags(__entry->fmode) :
+ "closed",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+
+#define show_lock_cmd(type) \
+ __print_symbolic((int)type, \
+ { F_GETLK, "GETLK" }, \
+ { F_SETLK, "SETLK" }, \
+ { F_SETLKW, "SETLKW" })
+#define show_lock_type(type) \
+ __print_symbolic((int)type, \
+ { F_RDLCK, "RDLCK" }, \
+ { F_WRLCK, "WRLCK" }, \
+ { F_UNLCK, "UNLCK" })
+
+DECLARE_EVENT_CLASS(nfs4_lock_event,
+ TP_PROTO(
+ const struct file_lock *request,
+ const struct nfs4_state *state,
+ int cmd,
+ int error
+ ),
+
+ TP_ARGS(request, state, cmd, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(int, cmd)
+ __field(char, type)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->error = error;
+ __entry->cmd = cmd;
+ __entry->type = request->fl_type;
+ __entry->start = request->fl_start;
+ __entry->end = request->fl_end;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ ),
+
+ TP_printk(
+ "error=%d (%s) cmd=%s:%s range=%lld:%lld "
+ "fileid=%02x:%02x:%llu fhandle=0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ show_lock_cmd(__entry->cmd),
+ show_lock_type(__entry->type),
+ (long long)__entry->start,
+ (long long)__entry->end,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+
+#define DEFINE_NFS4_LOCK_EVENT(name) \
+ DEFINE_EVENT(nfs4_lock_event, name, \
+ TP_PROTO( \
+ const struct file_lock *request, \
+ const struct nfs4_state *state, \
+ int cmd, \
+ int error \
+ ), \
+ TP_ARGS(request, state, cmd, error))
+DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock);
+DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock);
+DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim);
+DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired);
+DEFINE_NFS4_LOCK_EVENT(nfs4_unlock);
+
+DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
+ TP_PROTO(
+ const struct inode *inode,
+ fmode_t fmode
+ ),
+
+ TP_ARGS(inode, fmode),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(unsigned int, fmode)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fmode = (__force unsigned int)fmode;
+ ),
+
+ TP_printk(
+ "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x",
+ show_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+#define DEFINE_NFS4_SET_DELEGATION_EVENT(name) \
+ DEFINE_EVENT(nfs4_set_delegation_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ fmode_t fmode \
+ ), \
+ TP_ARGS(inode, fmode))
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation);
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation);
+
+TRACE_EVENT(nfs4_delegreturn_exit,
+ TP_PROTO(
+ const struct nfs4_delegreturnargs *args,
+ const struct nfs4_delegreturnres *res,
+ int error
+ ),
+
+ TP_ARGS(args, res, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = res->server->s_dev;
+ __entry->fhandle = nfs_fhandle_hash(args->fhandle);
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) dev=%02x:%02x fhandle=0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fhandle
+ )
+);
+
+#ifdef CONFIG_NFS_V4_1
+DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
+ TP_PROTO(
+ const struct nfs4_state *state,
+ const struct nfs4_lock_state *lsp,
+ int error
+ ),
+
+ TP_ARGS(state, lsp, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = state->inode;
+
+ __entry->error = error;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+
+#define DEFINE_NFS4_TEST_STATEID_EVENT(name) \
+ DEFINE_EVENT(nfs4_test_stateid_event, name, \
+ TP_PROTO( \
+ const struct nfs4_state *state, \
+ const struct nfs4_lock_state *lsp, \
+ int error \
+ ), \
+ TP_ARGS(state, lsp, error))
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_delegation_stateid);
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_open_stateid);
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_lock_stateid);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_lookup_event,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct qstr *name,
+ int error
+ ),
+
+ TP_ARGS(dir, name, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, error)
+ __field(u64, dir)
+ __string(name, name->name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->error = error;
+ __assign_str(name, name->name);
+ ),
+
+ TP_printk(
+ "error=%d (%s) name=%02x:%02x:%llu/%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS4_LOOKUP_EVENT(name) \
+ DEFINE_EVENT(nfs4_lookup_event, name, \
+ TP_PROTO( \
+ const struct inode *dir, \
+ const struct qstr *name, \
+ int error \
+ ), \
+ TP_ARGS(dir, name, error))
+
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_lookup);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_symlink);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_mkdir);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_mknod);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
+
+TRACE_EVENT(nfs4_rename,
+ TP_PROTO(
+ const struct inode *olddir,
+ const struct qstr *oldname,
+ const struct inode *newdir,
+ const struct qstr *newname,
+ int error
+ ),
+
+ TP_ARGS(olddir, oldname, newdir, newname, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, error)
+ __field(u64, olddir)
+ __string(oldname, oldname->name)
+ __field(u64, newdir)
+ __string(newname, newname->name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = olddir->i_sb->s_dev;
+ __entry->olddir = NFS_FILEID(olddir);
+ __entry->newdir = NFS_FILEID(newdir);
+ __entry->error = error;
+ __assign_str(oldname, oldname->name);
+ __assign_str(newname, newname->name);
+ ),
+
+ TP_printk(
+ "error=%d (%s) oldname=%02x:%02x:%llu/%s "
+ "newname=%02x:%02x:%llu/%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->olddir,
+ __get_str(oldname),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->newdir,
+ __get_str(newname)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs4_inode_event,
+ TP_PROTO(
+ const struct inode *inode,
+ int error
+ ),
+
+ TP_ARGS(inode, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+ )
+);
+
+#define DEFINE_NFS4_INODE_EVENT(name) \
+ DEFINE_EVENT(nfs4_inode_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ int error \
+ ), \
+ TP_ARGS(inode, error))
+
+DEFINE_NFS4_INODE_EVENT(nfs4_setattr);
+DEFINE_NFS4_INODE_EVENT(nfs4_access);
+DEFINE_NFS4_INODE_EVENT(nfs4_readlink);
+DEFINE_NFS4_INODE_EVENT(nfs4_readdir);
+DEFINE_NFS4_INODE_EVENT(nfs4_get_acl);
+DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
+DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation);
+DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn);
+
+DECLARE_EVENT_CLASS(nfs4_getattr_event,
+ TP_PROTO(
+ const struct nfs_server *server,
+ const struct nfs_fh *fhandle,
+ const struct nfs_fattr *fattr,
+ int error
+ ),
+
+ TP_ARGS(server, fhandle, fattr, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(unsigned int, valid)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = server->s_dev;
+ __entry->valid = fattr->valid;
+ __entry->fhandle = nfs_fhandle_hash(fhandle);
+ __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "valid=%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ show_nfs_fattr_flags(__entry->valid)
+ )
+);
+
+#define DEFINE_NFS4_GETATTR_EVENT(name) \
+ DEFINE_EVENT(nfs4_getattr_event, name, \
+ TP_PROTO( \
+ const struct nfs_server *server, \
+ const struct nfs_fh *fhandle, \
+ const struct nfs_fattr *fattr, \
+ int error \
+ ), \
+ TP_ARGS(server, fhandle, fattr, error))
+DEFINE_NFS4_GETATTR_EVENT(nfs4_getattr);
+DEFINE_NFS4_GETATTR_EVENT(nfs4_lookup_root);
+DEFINE_NFS4_GETATTR_EVENT(nfs4_fsinfo);
+
+DECLARE_EVENT_CLASS(nfs4_idmap_event,
+ TP_PROTO(
+ const char *name,
+ int len,
+ u32 id,
+ int error
+ ),
+
+ TP_ARGS(name, len, id, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(u32, id)
+ __dynamic_array(char, name, len > 0 ? len + 1 : 1)
+ ),
+
+ TP_fast_assign(
+ if (len < 0)
+ len = 0;
+ __entry->error = error < 0 ? error : 0;
+ __entry->id = id;
+ memcpy(__get_dynamic_array(name), name, len);
+ ((char *)__get_dynamic_array(name))[len] = 0;
+ ),
+
+ TP_printk(
+ "error=%d id=%u name=%s",
+ __entry->error,
+ __entry->id,
+ __get_str(name)
+ )
+);
+#define DEFINE_NFS4_IDMAP_EVENT(name) \
+ DEFINE_EVENT(nfs4_idmap_event, name, \
+ TP_PROTO( \
+ const char *name, \
+ int len, \
+ u32 id, \
+ int error \
+ ), \
+ TP_ARGS(name, len, id, error))
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_name_to_uid);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
+
+DECLARE_EVENT_CLASS(nfs4_read_event,
+ TP_PROTO(
+ const struct nfs_pgio_data *data,
+ int error
+ ),
+
+ TP_ARGS(data, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->header->inode;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->offset = data->args.offset;
+ __entry->count = data->args.count;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%zu",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset,
+ __entry->count
+ )
+);
+#define DEFINE_NFS4_READ_EVENT(name) \
+ DEFINE_EVENT(nfs4_read_event, name, \
+ TP_PROTO( \
+ const struct nfs_pgio_data *data, \
+ int error \
+ ), \
+ TP_ARGS(data, error))
+DEFINE_NFS4_READ_EVENT(nfs4_read);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_write_event,
+ TP_PROTO(
+ const struct nfs_pgio_data *data,
+ int error
+ ),
+
+ TP_ARGS(data, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->header->inode;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->offset = data->args.offset;
+ __entry->count = data->args.count;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%zu",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset,
+ __entry->count
+ )
+);
+
+#define DEFINE_NFS4_WRITE_EVENT(name) \
+ DEFINE_EVENT(nfs4_write_event, name, \
+ TP_PROTO( \
+ const struct nfs_pgio_data *data, \
+ int error \
+ ), \
+ TP_ARGS(data, error))
+DEFINE_NFS4_WRITE_EVENT(nfs4_write);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_commit_event,
+ TP_PROTO(
+ const struct nfs_commit_data *data,
+ int error
+ ),
+
+ TP_ARGS(data, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->inode;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->offset = data->args.offset;
+ __entry->count = data->args.count;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%zu",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset,
+ __entry->count
+ )
+);
+#define DEFINE_NFS4_COMMIT_EVENT(name) \
+ DEFINE_EVENT(nfs4_commit_event, name, \
+ TP_PROTO( \
+ const struct nfs_commit_data *data, \
+ int error \
+ ), \
+ TP_ARGS(data, error))
+DEFINE_NFS4_COMMIT_EVENT(nfs4_commit);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds);
+
+#define show_pnfs_iomode(iomode) \
+ __print_symbolic(iomode, \
+ { IOMODE_READ, "READ" }, \
+ { IOMODE_RW, "RW" }, \
+ { IOMODE_ANY, "ANY" })
+
+TRACE_EVENT(nfs4_layoutget,
+ TP_PROTO(
+ const struct nfs_open_context *ctx,
+ const struct pnfs_layout_range *args,
+ const struct pnfs_layout_range *res,
+ int error
+ ),
+
+ TP_ARGS(ctx, args, res, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u32, iomode)
+ __field(u64, offset)
+ __field(u64, count)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = ctx->dentry->d_inode;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->iomode = args->iomode;
+ __entry->offset = args->offset;
+ __entry->count = args->length;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "iomode=%s offset=%llu count=%llu",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ show_pnfs_iomode(__entry->iomode),
+ (unsigned long long)__entry->offset,
+ (unsigned long long)__entry->count
+ )
+);
+
+DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit);
+DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn);
+
+#endif /* CONFIG_NFS_V4_1 */
+
+#endif /* _TRACE_NFS4_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE nfs4trace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 26b14392043..939ae606cfa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -102,12 +102,19 @@ static int nfs4_stat_to_errno(int);
#define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2))
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+/* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */
+#define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN))
+#else
+#define nfs4_label_maxsz 0
+#endif
/* We support only one layout type per file system */
#define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
/* This is based on getfattr, which uses the most attributes: */
#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
3 + 3 + 3 + nfs4_owner_maxsz + \
- nfs4_group_maxsz + decode_mdsthreshold_maxsz))
+ nfs4_group_maxsz + nfs4_label_maxsz + \
+ decode_mdsthreshold_maxsz))
#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
nfs4_fattr_value_maxsz)
#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -115,6 +122,7 @@ static int nfs4_stat_to_errno(int);
1 + 2 + 1 + \
nfs4_owner_maxsz + \
nfs4_group_maxsz + \
+ nfs4_label_maxsz + \
4 + 4)
#define encode_savefh_maxsz (op_encode_hdr_maxsz)
#define decode_savefh_maxsz (op_decode_hdr_maxsz)
@@ -192,7 +200,8 @@ static int nfs4_stat_to_errno(int);
encode_stateid_maxsz + 3)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
#define encode_readdir_maxsz (op_encode_hdr_maxsz + \
- 2 + encode_verifier_maxsz + 5)
+ 2 + encode_verifier_maxsz + 5 + \
+ nfs4_label_maxsz)
#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
decode_verifier_maxsz)
#define encode_readlink_maxsz (op_encode_hdr_maxsz)
@@ -280,7 +289,9 @@ static int nfs4_stat_to_errno(int);
XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
1 /* flags */ + \
1 /* spa_how */ + \
- 0 /* SP4_NONE (for now) */ + \
+ /* max is SP4_MACH_CRED (for now) */ + \
+ 1 + NFS4_OP_MAP_NUM_WORDS + \
+ 1 + NFS4_OP_MAP_NUM_WORDS + \
1 /* implementation id array of size 1 */ + \
1 /* nii_domain */ + \
XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
@@ -292,7 +303,9 @@ static int nfs4_stat_to_errno(int);
1 /* eir_sequenceid */ + \
1 /* eir_flags */ + \
1 /* spr_how */ + \
- 0 /* SP4_NONE (for now) */ + \
+ /* max is SP4_MACH_CRED (for now) */ + \
+ 1 + NFS4_OP_MAP_NUM_WORDS + \
+ 1 + NFS4_OP_MAP_NUM_WORDS + \
2 /* eir_server_owner.so_minor_id */ + \
/* eir_server_owner.so_major_id<> */ \
XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
@@ -396,7 +409,7 @@ static int nfs4_stat_to_errno(int);
#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
-#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
+#define decode_free_stateid_maxsz (op_decode_hdr_maxsz)
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@@ -530,14 +543,10 @@ static int nfs4_stat_to_errno(int);
decode_setclientid_maxsz)
#define NFS4_enc_setclientid_confirm_sz \
(compound_encode_hdr_maxsz + \
- encode_setclientid_confirm_maxsz + \
- encode_putrootfh_maxsz + \
- encode_fsinfo_maxsz)
+ encode_setclientid_confirm_maxsz)
#define NFS4_dec_setclientid_confirm_sz \
(compound_decode_hdr_maxsz + \
- decode_setclientid_confirm_maxsz + \
- decode_putrootfh_maxsz + \
- decode_fsinfo_maxsz)
+ decode_setclientid_confirm_maxsz)
#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -581,11 +590,13 @@ static int nfs4_stat_to_errno(int);
#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
- encode_getattr_maxsz)
+ encode_getattr_maxsz + \
+ encode_renew_maxsz)
#define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
- decode_getattr_maxsz)
+ decode_getattr_maxsz + \
+ decode_renew_maxsz)
#define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -722,13 +733,15 @@ static int nfs4_stat_to_errno(int);
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_lookup_maxsz + \
- encode_fs_locations_maxsz)
+ encode_fs_locations_maxsz + \
+ encode_renew_maxsz)
#define NFS4_dec_fs_locations_sz \
(compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_lookup_maxsz + \
- decode_fs_locations_maxsz)
+ decode_fs_locations_maxsz + \
+ decode_renew_maxsz)
#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -737,6 +750,18 @@ static int nfs4_stat_to_errno(int);
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_secinfo_maxsz)
+#define NFS4_enc_fsid_present_sz \
+ (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_getfh_maxsz + \
+ encode_renew_maxsz)
+#define NFS4_dec_fsid_present_sz \
+ (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_getfh_maxsz + \
+ decode_renew_maxsz)
#if defined(CONFIG_NFS_V4_1)
#define NFS4_enc_bind_conn_to_session_sz \
(compound_encode_hdr_maxsz + \
@@ -857,6 +882,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
decode_sequence_maxsz +
decode_putfh_maxsz) *
XDR_UNIT);
+
+const u32 nfs41_maxgetdevinfo_overhead = ((RPC_MAX_REPHEADER_WITH_AUTH +
+ compound_decode_hdr_maxsz +
+ decode_sequence_maxsz) *
+ XDR_UNIT);
+EXPORT_SYMBOL_GPL(nfs41_maxgetdevinfo_overhead);
#endif /* CONFIG_NFS_V4_1 */
static const umode_t nfs_type2fmt[] = {
@@ -972,124 +1003,123 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
}
-static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
+static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
+ const struct nfs4_label *label,
+ const struct nfs_server *server)
{
char owner_name[IDMAP_NAMESZ];
char owner_group[IDMAP_NAMESZ];
int owner_namelen = 0;
int owner_grouplen = 0;
__be32 *p;
- __be32 *q;
- int len;
- uint32_t bmval0 = 0;
- uint32_t bmval1 = 0;
+ unsigned i;
+ uint32_t len = 0;
+ uint32_t bmval_len;
+ uint32_t bmval[3] = { 0 };
/*
* We reserve enough space to write the entire attribute buffer at once.
* In the worst-case, this would be
- * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
- * = 36 bytes, plus any contribution from variable-length fields
+ * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+ * = 40 bytes, plus any contribution from variable-length fields
* such as owner/group.
*/
- len = 16;
-
- /* Sigh */
- if (iap->ia_valid & ATTR_SIZE)
+ if (iap->ia_valid & ATTR_SIZE) {
+ bmval[0] |= FATTR4_WORD0_SIZE;
len += 8;
- if (iap->ia_valid & ATTR_MODE)
+ }
+ if (iap->ia_valid & ATTR_MODE) {
+ bmval[1] |= FATTR4_WORD1_MODE;
len += 4;
+ }
if (iap->ia_valid & ATTR_UID) {
owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
if (owner_namelen < 0) {
dprintk("nfs: couldn't resolve uid %d to string\n",
- iap->ia_uid);
+ from_kuid(&init_user_ns, iap->ia_uid));
/* XXX */
strcpy(owner_name, "nobody");
owner_namelen = sizeof("nobody") - 1;
/* goto out; */
}
+ bmval[1] |= FATTR4_WORD1_OWNER;
len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
}
if (iap->ia_valid & ATTR_GID) {
owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
if (owner_grouplen < 0) {
dprintk("nfs: couldn't resolve gid %d to string\n",
- iap->ia_gid);
+ from_kgid(&init_user_ns, iap->ia_gid));
strcpy(owner_group, "nobody");
owner_grouplen = sizeof("nobody") - 1;
/* goto out; */
}
+ bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
}
- if (iap->ia_valid & ATTR_ATIME_SET)
+ if (iap->ia_valid & ATTR_ATIME_SET) {
+ bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
len += 16;
- else if (iap->ia_valid & ATTR_ATIME)
+ } else if (iap->ia_valid & ATTR_ATIME) {
+ bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
len += 4;
- if (iap->ia_valid & ATTR_MTIME_SET)
+ }
+ if (iap->ia_valid & ATTR_MTIME_SET) {
+ bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
len += 16;
- else if (iap->ia_valid & ATTR_MTIME)
+ } else if (iap->ia_valid & ATTR_MTIME) {
+ bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
len += 4;
- p = reserve_space(xdr, len);
+ }
+ if (label) {
+ len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
+ bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
+ }
- /*
- * We write the bitmap length now, but leave the bitmap and the attribute
- * buffer length to be backfilled at the end of this routine.
- */
- *p++ = cpu_to_be32(2);
- q = p;
- p += 3;
+ if (bmval[2] != 0)
+ bmval_len = 3;
+ else if (bmval[1] != 0)
+ bmval_len = 2;
+ else
+ bmval_len = 1;
- if (iap->ia_valid & ATTR_SIZE) {
- bmval0 |= FATTR4_WORD0_SIZE;
+ p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len);
+
+ *p++ = cpu_to_be32(bmval_len);
+ for (i = 0; i < bmval_len; i++)
+ *p++ = cpu_to_be32(bmval[i]);
+ *p++ = cpu_to_be32(len);
+
+ if (bmval[0] & FATTR4_WORD0_SIZE)
p = xdr_encode_hyper(p, iap->ia_size);
- }
- if (iap->ia_valid & ATTR_MODE) {
- bmval1 |= FATTR4_WORD1_MODE;
+ if (bmval[1] & FATTR4_WORD1_MODE)
*p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
- }
- if (iap->ia_valid & ATTR_UID) {
- bmval1 |= FATTR4_WORD1_OWNER;
+ if (bmval[1] & FATTR4_WORD1_OWNER)
p = xdr_encode_opaque(p, owner_name, owner_namelen);
- }
- if (iap->ia_valid & ATTR_GID) {
- bmval1 |= FATTR4_WORD1_OWNER_GROUP;
+ if (bmval[1] & FATTR4_WORD1_OWNER_GROUP)
p = xdr_encode_opaque(p, owner_group, owner_grouplen);
+ if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+ if (iap->ia_valid & ATTR_ATIME_SET) {
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
+ } else
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
- if (iap->ia_valid & ATTR_ATIME_SET) {
- bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
- *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
- }
- else if (iap->ia_valid & ATTR_ATIME) {
- bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
- *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
- }
- if (iap->ia_valid & ATTR_MTIME_SET) {
- bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
- }
- else if (iap->ia_valid & ATTR_MTIME) {
- bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
- *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+ if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+ if (iap->ia_valid & ATTR_MTIME_SET) {
+ *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+ p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+ } else
+ *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
}
-
- /*
- * Now we backfill the bitmap and the attribute buffer length.
- */
- if (len != ((char *)p - (char *)q) + 4) {
- printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
- len, ((char *)p - (char *)q) + 4);
- BUG();
+ if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
+ *p++ = cpu_to_be32(label->lfs);
+ *p++ = cpu_to_be32(label->pi);
+ *p++ = cpu_to_be32(label->len);
+ p = xdr_encode_opaque_fixed(p, label->label, label->len);
}
- len = (char *)p - (char *)q - 12;
- *q++ = htonl(bmval0);
- *q++ = htonl(bmval1);
- *q = htonl(len);
/* out: */
}
@@ -1142,7 +1172,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
}
encode_string(xdr, create->name->len, create->name->name);
- encode_attrs(xdr, create->attrs, create->server);
+ encode_attrs(xdr, create->attrs, create->label, create->server);
}
static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1194,8 +1224,10 @@ encode_getattr_three(struct xdr_stream *xdr,
static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
{
- encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
- bitmask[1] & nfs4_fattr_bitmap[1], hdr);
+ encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+ bitmask[1] & nfs4_fattr_bitmap[1],
+ bitmask[2] & nfs4_fattr_bitmap[2],
+ hdr);
}
static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
@@ -1366,33 +1398,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
{
+ struct iattr dummy;
__be32 *p;
- struct nfs_client *clp;
p = reserve_space(xdr, 4);
- switch(arg->open_flags & O_EXCL) {
- case 0:
+ switch(arg->createmode) {
+ case NFS4_CREATE_UNCHECKED:
*p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
- encode_attrs(xdr, arg->u.attrs, arg->server);
+ encode_attrs(xdr, arg->u.attrs, arg->label, arg->server);
break;
- default:
- clp = arg->server->nfs_client;
- if (clp->cl_mvops->minor_version > 0) {
- if (nfs4_has_persistent_session(clp)) {
- *p = cpu_to_be32(NFS4_CREATE_GUARDED);
- encode_attrs(xdr, arg->u.attrs, arg->server);
- } else {
- struct iattr dummy;
-
- *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
- encode_nfs4_verifier(xdr, &arg->u.verifier);
- dummy.ia_valid = 0;
- encode_attrs(xdr, &dummy, arg->server);
- }
- } else {
- *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
- encode_nfs4_verifier(xdr, &arg->u.verifier);
- }
+ case NFS4_CREATE_GUARDED:
+ *p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ encode_attrs(xdr, arg->u.attrs, arg->label, arg->server);
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
+ encode_nfs4_verifier(xdr, &arg->u.verifier);
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
+ encode_nfs4_verifier(xdr, &arg->u.verifier);
+ dummy.ia_valid = 0;
+ encode_attrs(xdr, &dummy, arg->label, arg->server);
}
}
@@ -1459,6 +1486,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
encode_string(xdr, name->len, name->name);
}
+static inline void encode_claim_fh(struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_FH);
+}
+
+static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 4);
+ *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH);
+ encode_nfs4_stateid(xdr, stateid);
+}
+
static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr);
@@ -1474,6 +1518,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg,
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
break;
+ case NFS4_OPEN_CLAIM_FH:
+ encode_claim_fh(xdr);
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ encode_claim_delegate_cur_fh(xdr, &arg->u.delegation);
+ break;
default:
BUG();
}
@@ -1506,35 +1556,13 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
}
-static void encode_open_stateid(struct xdr_stream *xdr,
- const struct nfs_open_context *ctx,
- const struct nfs_lock_context *l_ctx,
- fmode_t fmode,
- int zero_seqid)
-{
- nfs4_stateid stateid;
-
- if (ctx->state != NULL) {
- const struct nfs_lockowner *lockowner = NULL;
-
- if (l_ctx != NULL)
- lockowner = &l_ctx->lockowner;
- nfs4_select_rw_stateid(&stateid, ctx->state,
- fmode, lockowner);
- if (zero_seqid)
- stateid.seqid = 0;
- encode_nfs4_stateid(xdr, &stateid);
- } else
- encode_nfs4_stateid(xdr, &zero_stateid);
-}
-
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr);
- encode_open_stateid(xdr, args->context, args->lock_context,
- FMODE_READ, hdr->minorversion);
+ encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
@@ -1543,12 +1571,14 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
{
- uint32_t attrs[2] = {
+ uint32_t attrs[3] = {
FATTR4_WORD0_RDATTR_ERROR,
FATTR4_WORD1_MOUNTED_ON_FILEID,
};
uint32_t dircount = readdir->count >> 1;
__be32 *p, verf[2];
+ uint32_t attrlen = 0;
+ unsigned int i;
if (readdir->plus) {
attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
@@ -1557,29 +1587,36 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+ attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
dircount >>= 1;
}
/* Use mounted_on_fileid only if the server supports it */
if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
attrs[0] |= FATTR4_WORD0_FILEID;
+ for (i = 0; i < ARRAY_SIZE(attrs); i++) {
+ attrs[i] &= readdir->bitmask[i];
+ if (attrs[i] != 0)
+ attrlen = i+1;
+ }
encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr);
encode_uint64(xdr, readdir->cookie);
encode_nfs4_verifier(xdr, &readdir->verifier);
- p = reserve_space(xdr, 20);
+ p = reserve_space(xdr, 12 + (attrlen << 2));
*p++ = cpu_to_be32(dircount);
*p++ = cpu_to_be32(readdir->count);
- *p++ = cpu_to_be32(2);
-
- *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
- *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
+ *p++ = cpu_to_be32(attrlen);
+ for (i = 0; i < attrlen; i++)
+ *p++ = cpu_to_be32(attrs[i]);
memcpy(verf, readdir->verifier.data, sizeof(verf));
- dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
+
+ dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n",
__func__,
(unsigned long long)readdir->cookie,
verf[0], verf[1],
attrs[0] & readdir->bitmask[0],
- attrs[1] & readdir->bitmask[1]);
+ attrs[1] & readdir->bitmask[1],
+ attrs[2] & readdir->bitmask[2]);
}
static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
@@ -1638,7 +1675,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
{
encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
encode_nfs4_stateid(xdr, &arg->stateid);
- encode_attrs(xdr, arg->iap, server);
+ encode_attrs(xdr, arg->iap, arg->label, server);
}
static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
@@ -1665,13 +1702,13 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
encode_nfs4_verifier(xdr, &arg->confirm);
}
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr);
- encode_open_stateid(xdr, args->context, args->lock_context,
- FMODE_WRITE, hdr->minorversion);
+ encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
@@ -1709,6 +1746,14 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr,
*p = 0; /* use_conn_in_rdma_mode = False */
}
+static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+{
+ unsigned int i;
+ encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS);
+ for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++)
+ encode_uint32(xdr, op_map->u.words[i]);
+}
+
static void encode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_args *args,
struct compound_hdr *hdr)
@@ -1722,9 +1767,20 @@ static void encode_exchange_id(struct xdr_stream *xdr,
encode_string(xdr, args->id_len, args->id);
- p = reserve_space(xdr, 12);
- *p++ = cpu_to_be32(args->flags);
- *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
+ encode_uint32(xdr, args->flags);
+ encode_uint32(xdr, args->state_protect.how);
+
+ switch (args->state_protect.how) {
+ case SP4_NONE:
+ break;
+ case SP4_MACH_CRED:
+ encode_op_map(xdr, &args->state_protect.enforce);
+ encode_op_map(xdr, &args->state_protect.allow);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
if (send_implementation_id &&
sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
@@ -1735,7 +1791,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
utsname()->version, utsname()->machine);
if (len > 0) {
- *p = cpu_to_be32(1); /* implementation id array length=1 */
+ encode_uint32(xdr, 1); /* implementation id array length=1 */
encode_string(xdr,
sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
@@ -1746,7 +1802,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, 0);
*p = cpu_to_be32(0);
} else
- *p = cpu_to_be32(0); /* implementation id array length=0 */
+ encode_uint32(xdr, 0); /* implementation id array length=0 */
}
static void encode_create_session(struct xdr_stream *xdr,
@@ -1799,7 +1855,7 @@ static void encode_create_session(struct xdr_stream *xdr,
*p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
/* authsys_parms rfc1831 */
- *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */
+ *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */
p = xdr_encode_opaque(p, machine_name, len);
*p++ = cpu_to_be32(0); /* UID */
*p++ = cpu_to_be32(0); /* GID */
@@ -1841,11 +1897,10 @@ static void encode_sequence(struct xdr_stream *xdr,
struct nfs4_slot *slot = args->sa_slot;
__be32 *p;
- if (slot == NULL)
- return;
-
tp = slot->table;
session = tp->session;
+ if (!session)
+ return;
encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
@@ -1901,7 +1956,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
NFS4_DEVICEID4_SIZE);
*p++ = cpu_to_be32(args->pdev->layout_type);
- *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
+ *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */
*p++ = cpu_to_be32(0); /* bitmap length 0 */
}
@@ -2015,7 +2070,7 @@ static void encode_free_stateid(struct xdr_stream *xdr,
struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
}
#endif /* CONFIG_NFS_V4_1 */
@@ -2026,9 +2081,9 @@ static void encode_free_stateid(struct xdr_stream *xdr,
static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
{
#if defined(CONFIG_NFS_V4_1)
-
- if (args->sa_slot)
- return args->sa_slot->table->session->clp->cl_mvops->minor_version;
+ struct nfs4_session *session = args->sa_slot->table->session;
+ if (session)
+ return session->clp->cl_mvops->minor_version;
#endif /* CONFIG_NFS_V4_1 */
return 0;
}
@@ -2398,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a READ request
*/
static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2460,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a WRITE request
*/
static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2609,12 +2664,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
struct compound_hdr hdr = {
.nops = 0,
};
- const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME };
encode_compound_hdr(xdr, req, &hdr);
encode_setclientid_confirm(xdr, arg, &hdr);
- encode_putrootfh(xdr, &hdr);
- encode_fsinfo(xdr, lease_bitmap, &hdr);
encode_nops(&hdr);
}
@@ -2651,11 +2703,20 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
- encode_putfh(xdr, args->dir_fh, &hdr);
- encode_lookup(xdr, args->name, &hdr);
- replen = hdr.replen; /* get the attribute into args->page */
- encode_fs_locations(xdr, args->bitmask, &hdr);
+ if (args->migration) {
+ encode_putfh(xdr, args->fh, &hdr);
+ replen = hdr.replen;
+ encode_fs_locations(xdr, args->bitmask, &hdr);
+ if (args->renew)
+ encode_renew(xdr, args->clientid, &hdr);
+ } else {
+ encode_putfh(xdr, args->dir_fh, &hdr);
+ encode_lookup(xdr, args->name, &hdr);
+ replen = hdr.replen;
+ encode_fs_locations(xdr, args->bitmask, &hdr);
+ }
+ /* Set up reply kvec to capture returned fs_locations array. */
xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
0, PAGE_SIZE);
encode_nops(&hdr);
@@ -2679,6 +2740,26 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
encode_nops(&hdr);
}
+/*
+ * Encode FSID_PRESENT request
+ */
+static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs4_fsid_present_arg *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->fh, &hdr);
+ encode_getfh(xdr, &hdr);
+ if (args->renew)
+ encode_renew(xdr, args->clientid, &hdr);
+ encode_nops(&hdr);
+}
+
#if defined(CONFIG_NFS_V4_1)
/*
* BIND_CONN_TO_SESSION request
@@ -3017,7 +3098,8 @@ out_overflow:
return -EIO;
}
-static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
@@ -3027,19 +3109,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
- if (opnum != expected) {
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
- return -EIO;
- }
+ if (unlikely(opnum != expected))
+ goto out_bad_operation;
nfserr = be32_to_cpup(p);
- if (nfserr != NFS_OK)
- return nfs4_stat_to_errno(nfserr);
- return 0;
+ if (nfserr == NFS_OK)
+ *nfs_retval = 0;
+ else
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
+ return true;
+out_bad_operation:
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ *nfs_retval = -EREMOTEIO;
+ return false;
out_overflow:
print_overflow_msg(__func__, xdr);
- return -EIO;
+ *nfs_retval = -EIO;
+ return false;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ int retval;
+
+ __decode_op_hdr(xdr, expected, &retval);
+ return retval;
}
/* Dummy routine */
@@ -3355,7 +3450,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
{
__be32 *p;
- *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL;
+ *res = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
@@ -3497,8 +3592,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
if (n == 0)
goto root_path;
dprintk("pathname4: ");
- path->ncomponents = 0;
- while (path->ncomponents < n) {
+ if (n > NFS4_PATHNAME_MAXCOMPONENTS) {
+ dprintk("cannot parse %d components in path\n", n);
+ goto out_eio;
+ }
+ for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) {
struct nfs4_string *component = &path->components[path->ncomponents];
status = decode_opaque_inline(xdr, &component->len, &component->data);
if (unlikely(status != 0))
@@ -3507,12 +3605,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
pr_cont("%s%.*s ",
(path->ncomponents != n ? "/ " : ""),
component->len, component->data);
- if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS)
- path->ncomponents++;
- else {
- dprintk("cannot parse %d components in path\n", n);
- goto out_eio;
- }
}
out:
return status;
@@ -3557,27 +3649,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
n = be32_to_cpup(p);
if (n <= 0)
goto out_eio;
- res->nlocations = 0;
- while (res->nlocations < n) {
+ for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
- struct nfs4_fs_location *loc = &res->locations[res->nlocations];
+ struct nfs4_fs_location *loc;
+ if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES)
+ break;
+ loc = &res->locations[res->nlocations];
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
m = be32_to_cpup(p);
- loc->nservers = 0;
dprintk("%s: servers:\n", __func__);
- while (loc->nservers < m) {
- struct nfs4_string *server = &loc->servers[loc->nservers];
- status = decode_opaque_inline(xdr, &server->len, &server->data);
- if (unlikely(status != 0))
- goto out_eio;
- dprintk("%s ", server->data);
- if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
- loc->nservers++;
- else {
+ for (loc->nservers = 0; loc->nservers < m; loc->nservers++) {
+ struct nfs4_string *server;
+
+ if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) {
unsigned int i;
dprintk("%s: using first %u of %u servers "
"returned for location %u\n",
@@ -3591,13 +3679,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (unlikely(status != 0))
goto out_eio;
}
+ break;
}
+ server = &loc->servers[loc->nservers];
+ status = decode_opaque_inline(xdr, &server->len, &server->data);
+ if (unlikely(status != 0))
+ goto out_eio;
+ dprintk("%s ", server->data);
}
status = decode_pathname(xdr, &loc->rootpath);
if (unlikely(status != 0))
goto out_eio;
- if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
- res->nlocations++;
}
if (res->nlocations != 0)
status = NFS_ATTR_FATTR_V4_LOCATIONS;
@@ -3778,14 +3870,14 @@ out_overflow:
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
- const struct nfs_server *server, uint32_t *uid,
+ const struct nfs_server *server, kuid_t *uid,
struct nfs4_string *owner_name)
{
uint32_t len;
__be32 *p;
int ret = 0;
- *uid = -2;
+ *uid = make_kuid(&init_user_ns, -2);
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
@@ -3813,7 +3905,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
__func__, len);
bitmap[1] &= ~FATTR4_WORD1_OWNER;
}
- dprintk("%s: uid=%d\n", __func__, (int)*uid);
+ dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
return ret;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -3821,14 +3913,14 @@ out_overflow:
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
- const struct nfs_server *server, uint32_t *gid,
+ const struct nfs_server *server, kgid_t *gid,
struct nfs4_string *group_name)
{
uint32_t len;
__be32 *p;
int ret = 0;
- *gid = -2;
+ *gid = make_kgid(&init_user_ns, -2);
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
@@ -3856,7 +3948,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
__func__, len);
bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
}
- dprintk("%s: gid=%d\n", __func__, (int)*gid);
+ dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
return ret;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -4056,6 +4148,56 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
return status;
}
+static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_label *label)
+{
+ uint32_t pi = 0;
+ uint32_t lfs = 0;
+ __u32 len;
+ __be32 *p;
+ int status = 0;
+
+ if (unlikely(bitmap[2] & (FATTR4_WORD2_SECURITY_LABEL - 1U)))
+ return -EIO;
+ if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ lfs = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ pi = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ len = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, len);
+ if (unlikely(!p))
+ goto out_overflow;
+ if (len < NFS4_MAXLABELLEN) {
+ if (label) {
+ memcpy(label->label, p, len);
+ label->len = len;
+ label->pi = pi;
+ label->lfs = lfs;
+ status = NFS_ATTR_FATTR_V4_SECURITY_LABEL;
+ }
+ bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ } else
+ printk(KERN_WARNING "%s: label too long (%u)!\n",
+ __func__, len);
+ }
+ if (label && label->label)
+ dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
+ (char *)label->label, label->len, label->pi, label->lfs);
+ return status;
+
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
{
int status = 0;
@@ -4398,7 +4540,7 @@ out_overflow:
static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_fattr *fattr, struct nfs_fh *fh,
- struct nfs4_fs_locations *fs_loc,
+ struct nfs4_fs_locations *fs_loc, struct nfs4_label *label,
const struct nfs_server *server)
{
int status;
@@ -4506,6 +4648,13 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
if (status < 0)
goto xdr_error;
+ if (label) {
+ status = decode_attr_security_label(xdr, bitmap, label);
+ if (status < 0)
+ goto xdr_error;
+ fattr->valid |= status;
+ }
+
xdr_error:
dprintk("%s: xdr returned %d\n", __func__, -status);
return status;
@@ -4513,7 +4662,7 @@ xdr_error:
static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
- const struct nfs_server *server)
+ struct nfs4_label *label, const struct nfs_server *server)
{
unsigned int savep;
uint32_t attrlen,
@@ -4532,7 +4681,8 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
if (status < 0)
goto xdr_error;
- status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server);
+ status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc,
+ label, server);
if (status < 0)
goto xdr_error;
@@ -4542,10 +4692,16 @@ xdr_error:
return status;
}
+static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+ struct nfs4_label *label, const struct nfs_server *server)
+{
+ return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server);
+}
+
static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
const struct nfs_server *server)
{
- return decode_getfattr_generic(xdr, fattr, NULL, NULL, server);
+ return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server);
}
/*
@@ -4555,7 +4711,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
uint32_t *layouttype)
{
- uint32_t *p;
+ __be32 *p;
int num;
p = xdr_inline_decode(xdr, 4);
@@ -4860,11 +5016,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
- status = decode_op_hdr(xdr, OP_OPEN);
- if (status != -EIO)
- nfs_increment_open_seqid(status, res->seqid);
- if (!status)
- status = decode_stateid(xdr, &res->stateid);
+ if (!__decode_op_hdr(xdr, OP_OPEN, &status))
+ return status;
+ nfs_increment_open_seqid(status, res->seqid);
+ if (status)
+ return status;
+ status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
@@ -4930,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_PUTROOTFH);
}
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+ struct nfs_pgio_res *res)
{
__be32 *p;
uint32_t count, eof, recvd;
@@ -5184,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
}
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
int status;
@@ -5209,27 +5367,30 @@ static int decode_delegreturn(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_DELEGRETURN);
}
-static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
+static int decode_secinfo_gss(struct xdr_stream *xdr,
+ struct nfs4_secinfo4 *flavor)
{
+ u32 oid_len;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- flavor->gss.sec_oid4.len = be32_to_cpup(p);
- if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
+ oid_len = be32_to_cpup(p);
+ if (oid_len > GSS_OID_MAX_LEN)
goto out_err;
- p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
+ p = xdr_inline_decode(xdr, oid_len);
if (unlikely(!p))
goto out_overflow;
- memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
+ memcpy(flavor->flavor_info.oid.data, p, oid_len);
+ flavor->flavor_info.oid.len = oid_len;
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- flavor->gss.qop4 = be32_to_cpup(p++);
- flavor->gss.service = be32_to_cpup(p);
+ flavor->flavor_info.qop = be32_to_cpup(p++);
+ flavor->flavor_info.service = be32_to_cpup(p);
return 0;
@@ -5242,10 +5403,10 @@ out_err:
static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
{
- struct nfs4_secinfo_flavor *sec_flavor;
+ struct nfs4_secinfo4 *sec_flavor;
+ unsigned int i, num_flavors;
int status;
__be32 *p;
- int i, num_flavors;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
@@ -5297,6 +5458,23 @@ static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_re
return decode_secinfo_common(xdr, res);
}
+static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+{
+ __be32 *p;
+ uint32_t bitmap_words;
+ unsigned int i;
+
+ p = xdr_inline_decode(xdr, 4);
+ bitmap_words = be32_to_cpup(p++);
+ if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
+ return -EIO;
+ p = xdr_inline_decode(xdr, 4 * bitmap_words);
+ for (i = 0; i < bitmap_words; i++)
+ op_map->u.words[i] = be32_to_cpup(p++);
+
+ return 0;
+}
+
static int decode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_res *res)
{
@@ -5320,10 +5498,22 @@ static int decode_exchange_id(struct xdr_stream *xdr,
res->seqid = be32_to_cpup(p++);
res->flags = be32_to_cpup(p++);
- /* We ask for SP4_NONE */
- dummy = be32_to_cpup(p);
- if (dummy != SP4_NONE)
+ res->state_protect.how = be32_to_cpup(p);
+ switch (res->state_protect.how) {
+ case SP4_NONE:
+ break;
+ case SP4_MACH_CRED:
+ status = decode_op_map(xdr, &res->state_protect.enforce);
+ if (status)
+ return status;
+ status = decode_op_map(xdr, &res->state_protect.allow);
+ if (status)
+ return status;
+ break;
+ default:
+ WARN_ON_ONCE(1);
return -EIO;
+ }
/* server_owner4.so_minor_id */
p = xdr_inline_decode(xdr, 8);
@@ -5517,6 +5707,8 @@ static int decode_sequence(struct xdr_stream *xdr,
if (res->sr_slot == NULL)
return 0;
+ if (!res->sr_slot->table->session)
+ return 0;
status = decode_op_hdr(xdr, OP_SEQUENCE);
if (!status)
@@ -5835,21 +6027,8 @@ out:
static int decode_free_stateid(struct xdr_stream *xdr,
struct nfs41_free_stateid_res *res)
{
- __be32 *p;
- int status;
-
- status = decode_op_hdr(xdr, OP_FREE_STATEID);
- if (status)
- return status;
-
- p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p))
- goto out_overflow;
- res->status = be32_to_cpup(p++);
+ res->status = decode_op_hdr(xdr, OP_FREE_STATEID);
return res->status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}
#endif /* CONFIG_NFS_V4_1 */
@@ -5934,7 +6113,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
- status = decode_getfattr(xdr, res->fattr, res->server);
+ status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
@@ -5960,7 +6139,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
goto out;
status = decode_getfh(xdr, res->fh);
if (status == 0)
- status = decode_getfattr(xdr, res->fattr, res->server);
+ status = decode_getfattr_label(xdr, res->fattr,
+ res->label, res->server);
out:
return status;
}
@@ -6051,7 +6231,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_restorefh(xdr);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server);
+ decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
@@ -6080,7 +6260,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_getfh(xdr, res->fh);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server);
+ decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
@@ -6112,7 +6292,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getfattr(xdr, res->fattr, res->server);
+ status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
@@ -6245,7 +6425,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
goto out;
if (res->access_request)
decode_access(xdr, &res->access_supported, &res->access_result);
- decode_getfattr(xdr, res->f_attr, res->server);
+ decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server);
out:
return status;
}
@@ -6322,7 +6502,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
status = decode_setattr(xdr);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server);
+ decode_getfattr_label(xdr, res->fattr, res->label, res->server);
out:
return status;
}
@@ -6459,7 +6639,7 @@ out:
* Decode Read response
*/
static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_readres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
@@ -6484,7 +6664,7 @@ out:
* Decode WRITE response
*/
static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
@@ -6648,8 +6828,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
* Decode SETCLIENTID_CONFIRM response
*/
static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
- struct xdr_stream *xdr,
- struct nfs_fsinfo *fsinfo)
+ struct xdr_stream *xdr)
{
struct compound_hdr hdr;
int status;
@@ -6657,10 +6836,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
status = decode_compound_hdr(xdr, &hdr);
if (!status)
status = decode_setclientid_confirm(xdr);
- if (!status)
- status = decode_putrootfh(xdr);
- if (!status)
- status = decode_fsinfo(xdr, fsinfo);
return status;
}
@@ -6710,13 +6885,26 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_lookup(xdr);
- if (status)
- goto out;
- xdr_enter_page(xdr, PAGE_SIZE);
- status = decode_getfattr_generic(xdr, &res->fs_locations->fattr,
+ if (res->migration) {
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+ &res->fs_locations->fattr,
NULL, res->fs_locations,
- res->fs_locations->server);
+ NULL, res->fs_locations->server);
+ if (status)
+ goto out;
+ if (res->renew)
+ status = decode_renew(xdr);
+ } else {
+ status = decode_lookup(xdr);
+ if (status)
+ goto out;
+ xdr_enter_page(xdr, PAGE_SIZE);
+ status = decode_getfattr_generic(xdr,
+ &res->fs_locations->fattr,
+ NULL, res->fs_locations,
+ NULL, res->fs_locations->server);
+ }
out:
return status;
}
@@ -6745,6 +6933,34 @@ out:
return status;
}
+/*
+ * Decode FSID_PRESENT response
+ */
+static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs4_fsid_present_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_getfh(xdr, res->fh);
+ if (status)
+ goto out;
+ if (res->renew)
+ status = decode_renew(xdr);
+out:
+ return status;
+}
+
#if defined(CONFIG_NFS_V4_1)
/*
* Decode BIND_CONN_TO_SESSION response
@@ -7129,7 +7345,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
goto out_overflow;
if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
- NULL, entry->server) < 0)
+ NULL, entry->label, entry->server) < 0)
goto out_overflow;
if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
entry->ino = entry->fattr->mounted_on_fileid;
@@ -7259,6 +7475,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
PROC(SECINFO, enc_secinfo, dec_secinfo),
+ PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present),
#if defined(CONFIG_NFS_V4_1)
PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
PROC(CREATE_SESSION, enc_create_session, dec_create_session),
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c
new file mode 100644
index 00000000000..4eb0aead69b
--- /dev/null
+++ b/fs/nfs/nfstrace.c
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/nfs_fs.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include "nfstrace.h"
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
new file mode 100644
index 00000000000..59f838cdc00
--- /dev/null
+++ b/fs/nfs/nfstrace.h
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nfs
+
+#if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NFS_H
+
+#include <linux/tracepoint.h>
+
+#define nfs_show_file_type(ftype) \
+ __print_symbolic(ftype, \
+ { DT_UNKNOWN, "UNKNOWN" }, \
+ { DT_FIFO, "FIFO" }, \
+ { DT_CHR, "CHR" }, \
+ { DT_DIR, "DIR" }, \
+ { DT_BLK, "BLK" }, \
+ { DT_REG, "REG" }, \
+ { DT_LNK, "LNK" }, \
+ { DT_SOCK, "SOCK" }, \
+ { DT_WHT, "WHT" })
+
+#define nfs_show_cache_validity(v) \
+ __print_flags(v, "|", \
+ { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \
+ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \
+ { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \
+ { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \
+ { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \
+ { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \
+ { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \
+ { NFS_INO_INVALID_LABEL, "INVALID_LABEL" })
+
+#define nfs_show_nfsi_flags(v) \
+ __print_flags(v, "|", \
+ { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
+ { 1 << NFS_INO_STALE, "STALE" }, \
+ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
+ { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
+ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
+ { 1 << NFS_INO_COMMIT, "COMMIT" }, \
+ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
+ { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
+
+DECLARE_EVENT_CLASS(nfs_inode_event,
+ TP_PROTO(
+ const struct inode *inode
+ ),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, version)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->version = inode->i_version;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (unsigned long long)__entry->version
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs_inode_event_done,
+ TP_PROTO(
+ const struct inode *inode,
+ int error
+ ),
+
+ TP_ARGS(inode, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(unsigned char, type)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, size)
+ __field(unsigned long, nfsi_flags)
+ __field(unsigned long, cache_validity)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ __entry->error = error;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->type = nfs_umode_to_dtype(inode->i_mode);
+ __entry->version = inode->i_version;
+ __entry->size = i_size_read(inode);
+ __entry->nfsi_flags = nfsi->flags;
+ __entry->cache_validity = nfsi->cache_validity;
+ ),
+
+ TP_printk(
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "type=%u (%s) version=%llu size=%lld "
+ "cache_validity=%lu (%s) nfs_flags=%ld (%s)",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->type,
+ nfs_show_file_type(__entry->type),
+ (unsigned long long)__entry->version,
+ (long long)__entry->size,
+ __entry->cache_validity,
+ nfs_show_cache_validity(__entry->cache_validity),
+ __entry->nfsi_flags,
+ nfs_show_nfsi_flags(__entry->nfsi_flags)
+ )
+);
+
+#define DEFINE_NFS_INODE_EVENT(name) \
+ DEFINE_EVENT(nfs_inode_event, name, \
+ TP_PROTO( \
+ const struct inode *inode \
+ ), \
+ TP_ARGS(inode))
+#define DEFINE_NFS_INODE_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_inode_event_done, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ int error \
+ ), \
+ TP_ARGS(inode, error))
+DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit);
+DEFINE_NFS_INODE_EVENT(nfs_getattr_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit);
+DEFINE_NFS_INODE_EVENT(nfs_setattr_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit);
+DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit);
+DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
+DEFINE_NFS_INODE_EVENT(nfs_access_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
+
+#define show_lookup_flags(flags) \
+ __print_flags((unsigned long)flags, "|", \
+ { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
+ { LOOKUP_DIRECTORY, "DIRECTORY" }, \
+ { LOOKUP_OPEN, "OPEN" }, \
+ { LOOKUP_CREATE, "CREATE" }, \
+ { LOOKUP_EXCL, "EXCL" })
+
+DECLARE_EVENT_CLASS(nfs_lookup_event,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry,
+ unsigned int flags
+ ),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, flags)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "flags=%u (%s) name=%02x:%02x:%llu/%s",
+ __entry->flags,
+ show_lookup_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS_LOOKUP_EVENT(name) \
+ DEFINE_EVENT(nfs_lookup_event, name, \
+ TP_PROTO( \
+ const struct inode *dir, \
+ const struct dentry *dentry, \
+ unsigned int flags \
+ ), \
+ TP_ARGS(dir, dentry, flags))
+
+DECLARE_EVENT_CLASS(nfs_lookup_event_done,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry,
+ unsigned int flags,
+ int error
+ ),
+
+ TP_ARGS(dir, dentry, flags, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(unsigned int, flags)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->error = error;
+ __entry->flags = flags;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
+ __entry->error,
+ __entry->flags,
+ show_lookup_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_lookup_event_done, name, \
+ TP_PROTO( \
+ const struct inode *dir, \
+ const struct dentry *dentry, \
+ unsigned int flags, \
+ int error \
+ ), \
+ TP_ARGS(dir, dentry, flags, error))
+
+DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter);
+DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
+DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
+DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
+
+#define show_open_flags(flags) \
+ __print_flags((unsigned long)flags, "|", \
+ { O_CREAT, "O_CREAT" }, \
+ { O_EXCL, "O_EXCL" }, \
+ { O_TRUNC, "O_TRUNC" }, \
+ { O_APPEND, "O_APPEND" }, \
+ { O_DSYNC, "O_DSYNC" }, \
+ { O_DIRECT, "O_DIRECT" }, \
+ { O_DIRECTORY, "O_DIRECTORY" })
+
+#define show_fmode_flags(mode) \
+ __print_flags(mode, "|", \
+ { ((__force unsigned long)FMODE_READ), "READ" }, \
+ { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+ { ((__force unsigned long)FMODE_EXEC), "EXEC" })
+
+TRACE_EVENT(nfs_atomic_open_enter,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct nfs_open_context *ctx,
+ unsigned int flags
+ ),
+
+ TP_ARGS(dir, ctx, flags),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, flags)
+ __field(unsigned int, fmode)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, ctx->dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+ __entry->fmode = (__force unsigned int)ctx->mode;
+ __assign_str(name, ctx->dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s",
+ __entry->flags,
+ show_open_flags(__entry->flags),
+ show_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfs_atomic_open_exit,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct nfs_open_context *ctx,
+ unsigned int flags,
+ int error
+ ),
+
+ TP_ARGS(dir, ctx, flags, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(unsigned int, flags)
+ __field(unsigned int, fmode)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, ctx->dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+ __entry->fmode = (__force unsigned int)ctx->mode;
+ __assign_str(name, ctx->dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d flags=%u (%s) fmode=%s "
+ "name=%02x:%02x:%llu/%s",
+ __entry->error,
+ __entry->flags,
+ show_open_flags(__entry->flags),
+ show_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfs_create_enter,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry,
+ unsigned int flags
+ ),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, flags)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "flags=%u (%s) name=%02x:%02x:%llu/%s",
+ __entry->flags,
+ show_open_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfs_create_exit,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry,
+ unsigned int flags,
+ int error
+ ),
+
+ TP_ARGS(dir, dentry, flags, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(unsigned int, flags)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
+ __entry->error,
+ __entry->flags,
+ show_open_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs_directory_event,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry
+ ),
+
+ TP_ARGS(dir, dentry),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "name=%02x:%02x:%llu/%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS_DIRECTORY_EVENT(name) \
+ DEFINE_EVENT(nfs_directory_event, name, \
+ TP_PROTO( \
+ const struct inode *dir, \
+ const struct dentry *dentry \
+ ), \
+ TP_ARGS(dir, dentry))
+
+DECLARE_EVENT_CLASS(nfs_directory_event_done,
+ TP_PROTO(
+ const struct inode *dir,
+ const struct dentry *dentry,
+ int error
+ ),
+
+ TP_ARGS(dir, dentry, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->error = error;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d name=%02x:%02x:%llu/%s",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+#define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_directory_event_done, name, \
+ TP_PROTO( \
+ const struct inode *dir, \
+ const struct dentry *dentry, \
+ int error \
+ ), \
+ TP_ARGS(dir, dentry, error))
+
+DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit);
+
+TRACE_EVENT(nfs_link_enter,
+ TP_PROTO(
+ const struct inode *inode,
+ const struct inode *dir,
+ const struct dentry *dentry
+ ),
+
+ TP_ARGS(inode, dir, dentry),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dir = NFS_FILEID(dir);
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fileid,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfs_link_exit,
+ TP_PROTO(
+ const struct inode *inode,
+ const struct inode *dir,
+ const struct dentry *dentry,
+ int error
+ ),
+
+ TP_ARGS(inode, dir, dentry, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u64, dir)
+ __string(name, dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dir = NFS_FILEID(dir);
+ __entry->error = error;
+ __assign_str(name, dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fileid,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs_rename_event,
+ TP_PROTO(
+ const struct inode *old_dir,
+ const struct dentry *old_dentry,
+ const struct inode *new_dir,
+ const struct dentry *new_dentry
+ ),
+
+ TP_ARGS(old_dir, old_dentry, new_dir, new_dentry),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, old_dir)
+ __field(u64, new_dir)
+ __string(old_name, old_dentry->d_name.name)
+ __string(new_name, new_dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = old_dir->i_sb->s_dev;
+ __entry->old_dir = NFS_FILEID(old_dir);
+ __entry->new_dir = NFS_FILEID(new_dir);
+ __assign_str(old_name, old_dentry->d_name.name);
+ __assign_str(new_name, new_dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->old_dir,
+ __get_str(old_name),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->new_dir,
+ __get_str(new_name)
+ )
+);
+#define DEFINE_NFS_RENAME_EVENT(name) \
+ DEFINE_EVENT(nfs_rename_event, name, \
+ TP_PROTO( \
+ const struct inode *old_dir, \
+ const struct dentry *old_dentry, \
+ const struct inode *new_dir, \
+ const struct dentry *new_dentry \
+ ), \
+ TP_ARGS(old_dir, old_dentry, new_dir, new_dentry))
+
+DECLARE_EVENT_CLASS(nfs_rename_event_done,
+ TP_PROTO(
+ const struct inode *old_dir,
+ const struct dentry *old_dentry,
+ const struct inode *new_dir,
+ const struct dentry *new_dentry,
+ int error
+ ),
+
+ TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, error)
+ __field(u64, old_dir)
+ __string(old_name, old_dentry->d_name.name)
+ __field(u64, new_dir)
+ __string(new_name, new_dentry->d_name.name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = old_dir->i_sb->s_dev;
+ __entry->old_dir = NFS_FILEID(old_dir);
+ __entry->new_dir = NFS_FILEID(new_dir);
+ __entry->error = error;
+ __assign_str(old_name, old_dentry->d_name.name);
+ __assign_str(new_name, new_dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "error=%d old_name=%02x:%02x:%llu/%s "
+ "new_name=%02x:%02x:%llu/%s",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->old_dir,
+ __get_str(old_name),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->new_dir,
+ __get_str(new_name)
+ )
+);
+#define DEFINE_NFS_RENAME_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_rename_event_done, name, \
+ TP_PROTO( \
+ const struct inode *old_dir, \
+ const struct dentry *old_dentry, \
+ const struct inode *new_dir, \
+ const struct dentry *new_dentry, \
+ int error \
+ ), \
+ TP_ARGS(old_dir, old_dentry, new_dir, \
+ new_dentry, error))
+
+DEFINE_NFS_RENAME_EVENT(nfs_rename_enter);
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit);
+
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename);
+
+TRACE_EVENT(nfs_sillyrename_unlink,
+ TP_PROTO(
+ const struct nfs_unlinkdata *data,
+ int error
+ ),
+
+ TP_ARGS(data, error),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, error)
+ __field(u64, dir)
+ __dynamic_array(char, name, data->args.name.len + 1)
+ ),
+
+ TP_fast_assign(
+ struct inode *dir = data->dir;
+ size_t len = data->args.name.len;
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->error = error;
+ memcpy(__get_dynamic_array(name),
+ data->args.name.name, len);
+ ((char *)__get_dynamic_array(name))[len] = 0;
+ ),
+
+ TP_printk(
+ "error=%d name=%02x:%02x:%llu/%s",
+ __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+ )
+);
+#endif /* _TRACE_NFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE nfstrace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f8..611320753db 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -234,7 +234,7 @@ static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
lseg = kzalloc(lseg_size, gfp_flags);
if (unlikely(!lseg)) {
- dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
+ dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
numdevs, lseg_size);
return -ENOMEM;
}
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync);
}
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
- struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct nfs_pgio_data *wdata = objios->oir.rpcdata;
struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page,
};
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
{
struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
return 0;
}
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req)
{
- if (!pnfs_generic_pg_test(pgio, prev, req))
- return false;
+ unsigned int size;
+
+ size = pnfs_generic_pg_test(pgio, prev, req);
+
+ if (!size || pgio->pg_count + req->wb_bytes >
+ (unsigned long)pgio->pg_layout_private)
+ return 0;
- return pgio->pg_count + req->wb_bytes <=
- (unsigned long)pgio->pg_layout_private;
+ return min(size, req->wb_bytes);
}
static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
@@ -647,6 +655,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR,
+ .owner = THIS_MODULE,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index a9ebd817278..765d3f54e98 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct objlayout *objlay;
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
- if (objlay) {
- spin_lock_init(&objlay->lock);
- INIT_LIST_HEAD(&objlay->err_list);
- }
+ if (!objlay)
+ return NULL;
+ spin_lock_init(&objlay->lock);
+ INIT_LIST_HEAD(&objlay->err_list);
dprintk("%s: Return %p\n", __func__, objlay);
return &objlay->pnfs_layout;
}
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_read_data *rdata = oir->rpcdata;
+ struct nfs_pgio_data *rdata = oir->rpcdata;
oir->status = rdata->task.tk_status = status;
if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads.
*/
enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_write_done(wdata);
}
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_write_data *wdata = oir->rpcdata;
+ struct nfs_pgio_data *wdata = oir->rpcdata;
oir->status = wdata->task.tk_status = status;
if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes.
*/
enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
int how)
{
struct nfs_pgio_header *hdr = wdata->header;
@@ -613,8 +613,10 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
pd.pgbase = 0;
pd.pglen = PAGE_SIZE;
pd.mincount = 0;
+ pd.maxcount = PAGE_SIZE;
- err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
+ err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd,
+ pnfslay->plh_lc_cred);
dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
if (err)
goto err_out;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 880ba086be9..01e041029a6 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -114,13 +114,13 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
gfp_t gfp_flags);
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
-/* objio_free_result will free these @oir structs recieved from
+/* objio_free_result will free these @oir structs received from
* objlayout_{read,write}_done
*/
extern void objio_free_result(struct objlayout_io_res *oir);
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
/*
* callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_read_data *);
+ struct nfs_pgio_data *);
extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_write_data *,
+ struct nfs_pgio_data *,
int how);
extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e56e846e9d2..17fab89f635 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,9 +24,12 @@
#include "internal.h"
#include "pnfs.h"
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
static struct kmem_cache *nfs_page_cachep;
+static const struct rpc_call_ops nfs_pgio_common_ops;
-bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
{
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -84,11 +87,217 @@ nfs_page_free(struct nfs_page *p)
kmem_cache_free(nfs_page_cachep, p);
}
+static void
+nfs_iocounter_inc(struct nfs_io_counter *c)
+{
+ atomic_inc(&c->io_count);
+}
+
+static void
+nfs_iocounter_dec(struct nfs_io_counter *c)
+{
+ if (atomic_dec_and_test(&c->io_count)) {
+ clear_bit(NFS_IO_INPROGRESS, &c->flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
+ }
+}
+
+static int
+__nfs_iocounter_wait(struct nfs_io_counter *c)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS);
+ DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS);
+ int ret = 0;
+
+ do {
+ prepare_to_wait(wq, &q.wait, TASK_KILLABLE);
+ set_bit(NFS_IO_INPROGRESS, &c->flags);
+ if (atomic_read(&c->io_count) == 0)
+ break;
+ ret = nfs_wait_bit_killable(&c->flags);
+ } while (atomic_read(&c->io_count) != 0);
+ finish_wait(wq, &q.wait);
+ return ret;
+}
+
+/**
+ * nfs_iocounter_wait - wait for i/o to complete
+ * @c: nfs_io_counter to use
+ *
+ * returns -ERESTARTSYS if interrupted by a fatal signal.
+ * Otherwise returns 0 once the io_count hits 0.
+ */
+int
+nfs_iocounter_wait(struct nfs_io_counter *c)
+{
+ if (atomic_read(&c->io_count) == 0)
+ return 0;
+ return __nfs_iocounter_wait(c);
+}
+
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ smp_mb__before_atomic();
+ clear_bit(PG_HEADLOCK, &head->wb_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+ struct nfs_page *head = req->wb_head;
+ struct nfs_page *tmp;
+
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+ WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+ tmp = req->wb_this_page;
+ while (tmp != req) {
+ if (!test_bit(bit, &tmp->wb_flags))
+ return false;
+ tmp = tmp->wb_this_page;
+ }
+
+ /* true! reset all bits */
+ tmp = req;
+ do {
+ clear_bit(bit, &tmp->wb_flags);
+ tmp = tmp->wb_this_page;
+ } while (tmp != req);
+
+ return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ * return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+ bool ret;
+
+ nfs_page_group_lock(req);
+ ret = nfs_page_group_sync_on_bit_locked(req, bit);
+ nfs_page_group_unlock(req);
+
+ return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ * or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+ WARN_ON_ONCE(prev == req);
+
+ if (!prev) {
+ /* a head request */
+ req->wb_head = req;
+ req->wb_this_page = req;
+ } else {
+ /* a subrequest */
+ WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+ req->wb_head = prev->wb_head;
+ req->wb_this_page = prev->wb_this_page;
+ prev->wb_this_page = req;
+
+ /* All subrequests take a ref on the head request until
+ * nfs_page_group_destroy is called */
+ kref_get(&req->wb_head->wb_kref);
+
+ /* grab extra ref if head request has extra ref from
+ * the write/commit path to handle handoff between write
+ * and commit lists */
+ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+ set_bit(PG_INODE_REF, &req->wb_flags);
+ kref_get(&req->wb_kref);
+ }
+ }
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ struct nfs_page *tmp, *next;
+
+ /* subrequests must release the ref on the head request */
+ if (req->wb_head != req)
+ nfs_release_request(req->wb_head);
+
+ if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+ return;
+
+ tmp = req;
+ do {
+ next = tmp->wb_this_page;
+ /* unlink and free */
+ tmp->wb_this_page = tmp;
+ tmp->wb_head = tmp;
+ nfs_free_request(tmp);
+ tmp = next;
+ } while (tmp != req);
+}
+
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
- * @inode: inode to which the request is attached
* @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
@@ -97,13 +306,15 @@ nfs_page_free(struct nfs_page *p)
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page,
- unsigned int offset, unsigned int count)
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+ struct nfs_page *last, unsigned int offset,
+ unsigned int count)
{
struct nfs_page *req;
struct nfs_lock_context *l_ctx;
+ if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
+ return ERR_PTR(-EBADF);
/* try to allocate the request struct */
req = nfs_page_alloc();
if (req == NULL)
@@ -116,6 +327,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
return ERR_CAST(l_ctx);
}
req->wb_lock_context = l_ctx;
+ nfs_iocounter_inc(&l_ctx->io_count);
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
@@ -128,6 +340,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
+ nfs_page_group_init(req, last);
return req;
}
@@ -141,9 +354,9 @@ void nfs_unlock_request(struct nfs_page *req)
printk(KERN_ERR "NFS: Invalid unlock attempted\n");
BUG();
}
- smp_mb__before_clear_bit();
+ smp_mb__before_atomic();
clear_bit(PG_BUSY, &req->wb_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&req->wb_flags, PG_BUSY);
}
@@ -175,6 +388,7 @@ static void nfs_clear_request(struct nfs_page *req)
req->wb_page = NULL;
}
if (l_ctx != NULL) {
+ nfs_iocounter_dec(&l_ctx->io_count);
nfs_put_lock_context(l_ctx);
req->wb_lock_context = NULL;
}
@@ -184,16 +398,22 @@ static void nfs_clear_request(struct nfs_page *req)
}
}
-
/**
* nfs_release_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
*/
-static void nfs_free_request(struct kref *kref)
+void nfs_free_request(struct nfs_page *req)
{
- struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
+ /* extra debug: make sure no sync bits are still set */
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
/* Release struct file and open context */
nfs_clear_request(req);
@@ -202,13 +422,7 @@ static void nfs_free_request(struct kref *kref)
void nfs_release_request(struct nfs_page *req)
{
- kref_put(&req->wb_kref, nfs_free_request);
-}
-
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
+ kref_put(&req->wb_kref, nfs_page_group_destroy);
}
/**
@@ -226,22 +440,249 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *prev, struct nfs_page *req)
{
- /*
- * FIXME: ideally we should be able to coalesce all requests
- * that are not block boundary aligned, but currently this
- * is problematic for the case of bsize < PAGE_CACHE_SIZE,
- * since nfs_flush_multi and nfs_pagein_multi assume you
- * can have only one struct nfs_page.
- */
- if (desc->pg_bsize < PAGE_SIZE)
+ if (desc->pg_count > desc->pg_bsize) {
+ /* should never happen */
+ WARN_ON_ONCE(1);
return 0;
+ }
- return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+ return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
+static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+{
+ return container_of(hdr, struct nfs_rw_header, header);
+}
+
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+ struct nfs_rw_header *header = ops->rw_alloc_header();
+
+ if (header) {
+ struct nfs_pgio_header *hdr = &header->header;
+
+ INIT_LIST_HEAD(&hdr->pages);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
+ hdr->rw_ops = ops;
+ }
+ return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+ hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
+/**
+ * nfs_pgio_data_alloc - Allocate pageio data
+ * @hdr: The header making a request
+ * @pagecount: Number of pages to create
+ */
+static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_pgio_data *data, *prealloc;
+
+ prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
+ }
+out:
+ return data;
+}
+
+/**
+ * nfs_pgio_data_release - Properly free pageio data
+ * @data: The data to release
+ */
+void nfs_pgio_data_release(struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
+
+ put_nfs_open_context(data->args.context);
+ if (data->pages.pagevec != data->pages.page_array)
+ kfree(data->pages.pagevec);
+ if (data == &pageio_header->rpc_data) {
+ data->header = NULL;
+ data = NULL;
+ }
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ /* Note: we only free the rpc_task after callbacks are done.
+ * See the comment in rpc_free_task() for why
+ */
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+
+/**
+ * nfs_pgio_rpcsetup - Set up arguments for a pageio call
+ * @data: The pageio data
+ * @count: Number of bytes to read
+ * @offset: Initial offset
+ * @how: How to commit data (writes only)
+ * @cinfo: Commit information for the call (writes only)
+ */
+static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+ unsigned int count, unsigned int offset,
+ int how, struct nfs_commit_info *cinfo)
+{
+ struct nfs_page *req = data->header->req;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->args.fh = NFS_FH(data->header->inode);
+ data->args.offset = req_offset(req) + offset;
+ /* pnfs_set_layoutcommit needs this */
+ data->mds_offset = data->args.offset;
+ data->args.pgbase = req->wb_pgbase + offset;
+ data->args.pages = data->pages.pagevec;
+ data->args.count = count;
+ data->args.context = get_nfs_open_context(req->wb_context);
+ data->args.lock_context = req->wb_lock_context;
+ data->args.stable = NFS_UNSTABLE;
+ switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+ case 0:
+ break;
+ case FLUSH_COND_STABLE:
+ if (nfs_reqs_to_commit(cinfo))
+ break;
+ default:
+ data->args.stable = NFS_FILE_SYNC;
+ }
+
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.eof = 0;
+ data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
+}
+
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ int err;
+ err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+ if (err)
+ rpc_exit(task, err);
+}
+
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+ const struct rpc_call_ops *call_ops, int how, int flags)
+{
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->header->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .task = &data->task,
+ .rpc_message = &msg,
+ .callback_ops = call_ops,
+ .callback_data = data,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | flags,
+ };
+ int ret = 0;
+
+ data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+ dprintk("NFS: %5u initiated pgio call "
+ "(req %s/%llu, %u bytes @ offset %llu)\n",
+ data->task.tk_pid,
+ data->header->inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(data->header->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+ if (how & FLUSH_SYNC) {
+ ret = rpc_wait_for_completion_task(task);
+ if (ret == 0)
+ ret = task->tk_status;
+ }
+ rpc_put_task(task);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
+/**
+ * nfs_pgio_error - Clean up from a pageio error
+ * @desc: IO descriptor
+ * @hdr: pageio header
+ */
+static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ nfs_pgio_data_release(hdr->data);
+ hdr->data = NULL;
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+static void nfs_pgio_release(void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ if (data->header->rw_ops->rw_release)
+ data->header->rw_ops->rw_release(data);
+ nfs_pgio_data_release(data);
+}
+
/**
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
@@ -254,6 +695,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
const struct nfs_pgio_completion_ops *compl_ops,
+ const struct nfs_rw_ops *rw_ops,
size_t bsize,
int io_flags)
{
@@ -267,6 +709,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
desc->pg_completion_ops = compl_ops;
+ desc->pg_rw_ops = rw_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
@@ -276,6 +719,107 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
EXPORT_SYMBOL_GPL(nfs_pageio_init);
/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+static void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ struct inode *inode = data->header->inode;
+
+ dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+ task->tk_pid, task->tk_status);
+
+ if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+ return;
+ if (task->tk_status < 0)
+ nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+ else
+ data->header->rw_ops->rw_result(task, data);
+}
+
+/*
+ * Create an RPC task for the given read or write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_page *req;
+ struct page **pages;
+ struct nfs_pgio_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct nfs_commit_info cinfo;
+
+ data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data)
+ return nfs_pgio_error(desc, hdr);
+
+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+ pages = data->pages.pagevec;
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);
+ *pages++ = req->wb_page;
+ }
+
+ if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
+ desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
+ /* Set up the argument struct */
+ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+ hdr->data = data;
+ desc->pg_rpc_callops = &nfs_pgio_common_ops;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pgio);
+
+static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
+{
+ struct nfs_rw_header *rw_hdr;
+ struct nfs_pgio_header *hdr;
+ int ret;
+
+ rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
+ if (!rw_hdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &rw_hdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pgio(desc, hdr);
+ if (ret == 0)
+ ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+ hdr->data, desc->pg_rpc_callops,
+ desc->pg_ioflags, 0);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ return ret;
+}
+
+static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+ const struct nfs_open_context *ctx2)
+{
+ return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+}
+
+static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
+ const struct nfs_lock_context *l2)
+{
+ return l1->lockowner.l_owner == l2->lockowner.l_owner
+ && l1->lockowner.l_pid == l2->lockowner.l_pid;
+}
+
+/**
* nfs_can_coalesce_requests - test two requests for compatibility
* @prev: pointer to nfs_page
* @req: pointer to nfs_page
@@ -290,21 +834,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
- if (req->wb_context->cred != prev->wb_context->cred)
- return false;
- if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner)
- return false;
- if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid)
- return false;
- if (req->wb_context->state != prev->wb_context->state)
- return false;
- if (req->wb_pgbase != 0)
- return false;
- if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
- return false;
- if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
- return false;
- return pgio->pg_ops->pg_test(pgio, prev, req);
+ size_t size;
+
+ if (prev) {
+ if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+ return false;
+ if (req->wb_context->dentry->d_inode->i_flock != NULL &&
+ !nfs_match_lock_context(req->wb_lock_context,
+ prev->wb_lock_context))
+ return false;
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
+ }
+ size = pgio->pg_ops->pg_test(pgio, prev, req);
+ WARN_ON_ONCE(size > req->wb_bytes);
+ if (size && size < req->wb_bytes)
+ req->wb_bytes = size;
+ return size > 0;
}
/**
@@ -318,17 +864,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
+ struct nfs_page *prev = NULL;
if (desc->pg_count != 0) {
- struct nfs_page *prev;
-
prev = nfs_list_entry(desc->pg_list.prev);
- if (!nfs_can_coalesce_requests(prev, req, desc))
- return 0;
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
+ if (!nfs_can_coalesce_requests(prev, req, desc))
+ return 0;
nfs_list_remove_request(req);
nfs_list_add_request(req, &desc->pg_list);
desc->pg_count += req->wb_bytes;
@@ -358,22 +903,72 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* @desc: destination io descriptor
* @req: request
*
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
- while (!nfs_pageio_do_add_request(desc, req)) {
- desc->pg_moreio = 1;
- nfs_pageio_doio(desc);
- if (desc->pg_error < 0)
- return 0;
- desc->pg_moreio = 0;
- if (desc->pg_recoalesce)
- return 0;
- }
+ struct nfs_page *subreq;
+ unsigned int bytes_left = 0;
+ unsigned int offset, pgbase;
+
+ nfs_page_group_lock(req);
+
+ subreq = req;
+ bytes_left = subreq->wb_bytes;
+ offset = subreq->wb_offset;
+ pgbase = subreq->wb_pgbase;
+
+ do {
+ if (!nfs_pageio_do_add_request(desc, subreq)) {
+ /* make sure pg_test call(s) did nothing */
+ WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+ WARN_ON_ONCE(subreq->wb_offset != offset);
+ WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+ nfs_page_group_unlock(req);
+ desc->pg_moreio = 1;
+ nfs_pageio_doio(desc);
+ if (desc->pg_error < 0)
+ return 0;
+ if (desc->pg_recoalesce)
+ return 0;
+ /* retry add_request for this subreq */
+ nfs_page_group_lock(req);
+ continue;
+ }
+
+ /* check for buggy pg_test call(s) */
+ WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+ WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+ WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+ bytes_left -= subreq->wb_bytes;
+ offset += subreq->wb_bytes;
+ pgbase += subreq->wb_bytes;
+
+ if (bytes_left) {
+ subreq = nfs_create_request(req->wb_context,
+ req->wb_page,
+ subreq, pgbase, bytes_left);
+ if (IS_ERR(subreq))
+ goto err_ptr;
+ nfs_lock_request(subreq);
+ subreq->wb_offset = offset;
+ subreq->wb_index = req->wb_index;
+ }
+ } while (bytes_left > 0);
+
+ nfs_page_group_unlock(req);
return 1;
+err_ptr:
+ desc->pg_error = PTR_ERR(subreq);
+ nfs_page_group_unlock(req);
+ return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -386,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
desc->pg_count = 0;
desc->pg_base = 0;
desc->pg_recoalesce = 0;
+ desc->pg_moreio = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
@@ -472,3 +1068,13 @@ void nfs_destroy_nfspagecache(void)
kmem_cache_destroy(nfs_page_cachep);
}
+static const struct rpc_call_ops nfs_pgio_common_ops = {
+ .rpc_call_prepare = nfs_pgio_prepare,
+ .rpc_call_done = nfs_pgio_result,
+ .rpc_release = nfs_pgio_release,
+};
+
+const struct nfs_pageio_ops nfs_pgio_rw_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_pgios,
+};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d00260b0810..6fdcd233d6f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -33,6 +33,7 @@
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
+#include "nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@@ -360,7 +361,7 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
}
EXPORT_SYMBOL_GPL(pnfs_put_lseg);
-static inline u64
+static u64
end_offset(u64 start, u64 len)
{
u64 end;
@@ -376,9 +377,9 @@ end_offset(u64 start, u64 len)
* start2 end2
* [----------------)
*/
-static inline int
-lo_seg_contained(struct pnfs_layout_range *l1,
- struct pnfs_layout_range *l2)
+static bool
+pnfs_lseg_range_contained(const struct pnfs_layout_range *l1,
+ const struct pnfs_layout_range *l2)
{
u64 start1 = l1->offset;
u64 end1 = end_offset(start1, l1->length);
@@ -395,9 +396,9 @@ lo_seg_contained(struct pnfs_layout_range *l1,
* start2 end2
* [----------------)
*/
-static inline int
-lo_seg_intersecting(struct pnfs_layout_range *l1,
- struct pnfs_layout_range *l2)
+static bool
+pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
+ const struct pnfs_layout_range *l2)
{
u64 start1 = l1->offset;
u64 end1 = end_offset(start1, l1->length);
@@ -409,12 +410,22 @@ lo_seg_intersecting(struct pnfs_layout_range *l1,
}
static bool
-should_free_lseg(struct pnfs_layout_range *lseg_range,
- struct pnfs_layout_range *recall_range)
+should_free_lseg(const struct pnfs_layout_range *lseg_range,
+ const struct pnfs_layout_range *recall_range)
{
return (recall_range->iomode == IOMODE_ANY ||
lseg_range->iomode == recall_range->iomode) &&
- lo_seg_intersecting(lseg_range, recall_range);
+ pnfs_lseg_range_intersecting(lseg_range, recall_range);
+}
+
+static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ if (!atomic_dec_and_test(&lseg->pls_refcount))
+ return false;
+ pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+ list_add(&lseg->pls_list, tmp_list);
+ return true;
}
/* Returns 1 if lseg is removed from list, 0 otherwise */
@@ -430,11 +441,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
- list_add(&lseg->pls_list, tmp_list);
+ if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
- }
}
return rv;
}
@@ -505,37 +513,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
-/*
- * Called by the state manger to remove all layouts established under an
- * expired lease.
- */
-void
-pnfs_destroy_all_layouts(struct nfs_client *clp)
+static bool
+pnfs_layout_add_bulk_destroy_list(struct inode *inode,
+ struct list_head *layout_list)
{
- struct nfs_server *server;
struct pnfs_layout_hdr *lo;
- LIST_HEAD(tmp_list);
+ bool ret = false;
- nfs4_deviceid_mark_client_invalid(clp);
- nfs4_deviceid_purge_client(clp);
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
+ pnfs_get_layout_hdr(lo);
+ list_add(&lo->plh_bulk_destroy, layout_list);
+ ret = true;
+ }
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+/* Caller must hold rcu_read_lock and clp->cl_lock */
+static int
+pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
+ struct nfs_server *server,
+ struct list_head *layout_list)
+{
+ struct pnfs_layout_hdr *lo, *next;
+ struct inode *inode;
+
+ list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
+ inode = igrab(lo->plh_inode);
+ if (inode == NULL)
+ continue;
+ list_del_init(&lo->plh_layouts);
+ if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
+ continue;
+ rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
+ iput(inode);
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static int
+pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
+ bool is_bulk_recall)
+{
+ struct pnfs_layout_hdr *lo;
+ struct inode *inode;
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+ LIST_HEAD(lseg_list);
+ int ret = 0;
+
+ while (!list_empty(layout_list)) {
+ lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
+ plh_bulk_destroy);
+ dprintk("%s freeing layout for inode %lu\n", __func__,
+ lo->plh_inode->i_ino);
+ inode = lo->plh_inode;
+ spin_lock(&inode->i_lock);
+ list_del_init(&lo->plh_bulk_destroy);
+ lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
+ if (is_bulk_recall)
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+ ret = -EAGAIN;
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&lseg_list);
+ pnfs_put_layout_hdr(lo);
+ iput(inode);
+ }
+ return ret;
+}
+
+int
+pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
+ struct nfs_fsid *fsid,
+ bool is_recall)
+{
+ struct nfs_server *server;
+ LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
+restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
- if (!list_empty(&server->layouts))
- list_splice_init(&server->layouts, &tmp_list);
+ if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
+ continue;
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp,
+ server,
+ &layout_list) != 0)
+ goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- while (!list_empty(&tmp_list)) {
- lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
- plh_layouts);
- dprintk("%s freeing layout for inode %lu\n", __func__,
- lo->plh_inode->i_ino);
- list_del_init(&lo->plh_layouts);
- pnfs_destroy_layout(NFS_I(lo->plh_inode));
+ if (list_empty(&layout_list))
+ return 0;
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+}
+
+int
+pnfs_destroy_layouts_byclid(struct nfs_client *clp,
+ bool is_recall)
+{
+ struct nfs_server *server;
+ LIST_HEAD(layout_list);
+
+ spin_lock(&clp->cl_lock);
+ rcu_read_lock();
+restart:
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ if (pnfs_layout_bulk_destroy_byserver_locked(clp,
+ server,
+ &layout_list) != 0)
+ goto restart;
}
+ rcu_read_unlock();
+ spin_unlock(&clp->cl_lock);
+
+ if (list_empty(&layout_list))
+ return 0;
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+}
+
+/*
+ * Called by the state manger to remove all layouts established under an
+ * expired lease.
+ */
+void
+pnfs_destroy_all_layouts(struct nfs_client *clp)
+{
+ nfs4_deviceid_mark_client_invalid(clp);
+ nfs4_deviceid_purge_client(clp);
+
+ pnfs_destroy_layouts_byclid(clp, false);
}
/*
@@ -544,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
*/
static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
{
- return (s32)s1 - (s32)s2 > 0;
+ return (s32)(s1 - s2) > 0;
+}
+
+static void
+pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new,
+ struct list_head *free_me_list)
+{
+ if (nfs4_stateid_match_other(&lo->plh_stateid, new))
+ return;
+ /* Layout is new! Kill existing layout segments */
+ pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
}
/* update lo->plh_stateid with new if is more recent */
@@ -601,6 +730,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
spin_lock(&lo->plh_inode->i_lock);
if (pnfs_layoutgets_blocked(lo, 1)) {
status = -EAGAIN;
+ } else if (!nfs4_valid_open_stateid(open_state)) {
+ status = -EBADF;
} else if (list_empty(&lo->plh_segs)) {
int seq;
@@ -647,6 +778,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
lgp->gfp_flags = gfp_flags;
+ lgp->cred = lo->plh_lc_cred;
/* Synchronously retrieve layout information from server and
* store in lseg.
@@ -667,6 +799,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}
+static void pnfs_clear_layoutcommit(struct inode *inode,
+ struct list_head *head)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ return;
+ list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+ if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ continue;
+ pnfs_lseg_dec_and_remove_zero(lseg, head);
+ }
+}
+
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
@@ -698,6 +845,7 @@ _pnfs_return_layout(struct inode *ino)
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
+ pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
@@ -710,8 +858,6 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
- WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
@@ -727,6 +873,7 @@ _pnfs_return_layout(struct inode *ino)
lrp->args.inode = ino;
lrp->args.layout = lo;
lrp->clp = NFS_SERVER(ino)->nfs_client;
+ lrp->cred = lo->plh_lc_cred;
status = nfs4_proc_layoutreturn(lrp);
out:
@@ -735,6 +882,33 @@ out:
}
EXPORT_SYMBOL_GPL(_pnfs_return_layout);
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ int ret;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo == NULL) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ pnfs_get_layout_hdr(lo);
+ /* Block new layoutgets and read/write to ds */
+ lo->plh_block_lgets++;
+ spin_unlock(&inode->i_lock);
+ filemap_fdatawait(inode->i_mapping);
+ ret = pnfs_layoutcommit_inode(inode, true);
+ if (ret == 0)
+ ret = _pnfs_return_layout(inode);
+ spin_lock(&inode->i_lock);
+ lo->plh_block_lgets--;
+ spin_unlock(&inode->i_lock);
+ pnfs_put_layout_hdr(lo);
+ return ret;
+}
+
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
@@ -824,8 +998,8 @@ out:
* are seen first.
*/
static s64
-cmp_layout(struct pnfs_layout_range *l1,
- struct pnfs_layout_range *l2)
+pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1,
+ const struct pnfs_layout_range *l2)
{
s64 d;
@@ -852,7 +1026,7 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin\n", __func__);
list_for_each_entry(lp, &lo->plh_segs, pls_list) {
- if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
+ if (pnfs_lseg_range_cmp(&lseg->pls_range, &lp->pls_range) > 0)
continue;
list_add_tail(&lseg->pls_list, &lp->pls_list);
dprintk("%s: inserted lseg %p "
@@ -888,9 +1062,9 @@ alloc_init_layout_hdr(struct inode *ino,
atomic_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->plh_layouts);
INIT_LIST_HEAD(&lo->plh_segs);
- INIT_LIST_HEAD(&lo->plh_bulk_recall);
+ INIT_LIST_HEAD(&lo->plh_bulk_destroy);
lo->plh_inode = ino;
- lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
+ lo->plh_lc_cred = get_rpccred(ctx->cred);
return lo;
}
@@ -931,21 +1105,21 @@ out_existing:
* READ READ true
* READ RW true
*/
-static int
-is_matching_lseg(struct pnfs_layout_range *ls_range,
- struct pnfs_layout_range *range)
+static bool
+pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range,
+ const struct pnfs_layout_range *range)
{
struct pnfs_layout_range range1;
if ((range->iomode == IOMODE_RW &&
ls_range->iomode != IOMODE_RW) ||
- !lo_seg_intersecting(ls_range, range))
+ !pnfs_lseg_range_intersecting(ls_range, range))
return 0;
/* range1 covers only the first byte in the range */
range1 = *range;
range1.length = 1;
- return lo_seg_contained(ls_range, &range1);
+ return pnfs_lseg_range_contained(ls_range, &range1);
}
/*
@@ -961,7 +1135,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
- is_matching_lseg(&lseg->pls_range, range)) {
+ pnfs_lseg_range_match(&lseg->pls_range, range)) {
ret = pnfs_get_lseg(lseg);
break;
}
@@ -1071,7 +1245,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
- bool first = false;
+ bool first;
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
goto out;
@@ -1105,10 +1279,9 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
atomic_inc(&lo->plh_outstanding);
- if (list_empty(&lo->plh_segs))
- first = true;
-
+ first = list_empty(&lo->plh_layouts) ? true : false;
spin_unlock(&ino->i_lock);
+
if (first) {
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
@@ -1153,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode;
+ LIST_HEAD(free_me);
int status = 0;
/* Inject layout blob into I/O device driver */
@@ -1179,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget_reply;
}
+ /* Check that the new stateid matches the old stateid */
+ pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);
@@ -1193,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me);
return lseg;
out:
return ERR_PTR(status);
@@ -1211,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
-
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
@@ -1240,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
{
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_write_mds(pgio);
- return;
- }
-
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1257,68 +1424,63 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_read(pgio, inode, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (pgio->pg_lseg == NULL)
- return nfs_generic_pg_test(pgio, prev, req);
+ unsigned int size;
+ u64 seg_end, req_start, seg_left;
+
+ size = nfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
/*
- * Test if a nfs_page is fully contained in the pnfs_layout_range.
- * Note that this test makes several assumptions:
- * - that the previous nfs_page in the struct nfs_pageio_descriptor
- * is known to lie within the range.
- * - that the nfs_page being tested is known to be contiguous with the
- * previous nfs_page.
- * - Layout ranges are page aligned, so we only have to test the
- * start offset of the request.
+ * 'size' contains the number of bytes left in the current page (up
+ * to the original size asked for in @req->wb_bytes).
+ *
+ * Calculate how many bytes are left in the layout segment
+ * and if there are less bytes than 'size', return that instead.
*
* Please also note that 'end_offset' is actually the offset of the
* first byte that lies outside the pnfs_layout_range. FIXME?
*
*/
- return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length);
+ if (pgio->pg_lseg) {
+ seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
+ req_start = req_offset(req);
+ WARN_ON_ONCE(req_start > seg_end);
+ /* start of request is past the last byte of this segment */
+ if (req_start >= seg_end)
+ return 0;
+
+ /* adjust 'size' iff there are fewer bytes left in the
+ * segment than what nfs_generic_pg_test returned */
+ seg_left = seg_end - req_start;
+ if (seg_left < size)
+ size = (unsigned int)seg_left;
+ }
+
+ return size;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
int pnfs_write_done_resend_to_mds(struct inode *inode,
struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops)
+ const struct nfs_pgio_completion_ops *compl_ops,
+ struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+ nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
+ pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1340,29 +1502,30 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
&hdr->pages,
- hdr->completion_ops);
+ hdr->completion_ops,
+ hdr->dreq);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
+ trace_nfs4_pnfs_write(data, hdr->pnfs_error);
if (!hdr->pnfs_error) {
pnfs_set_layoutcommit(data);
hdr->mds_ops->rpc_call_done(&data->task, data);
@@ -1374,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1383,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_writedata_release(data);
+ nfs_pgio_data_release(data);
}
static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
@@ -1409,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
}
static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr, int how)
{
- struct nfs_write_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_write_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_writehdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_write_header *whdr;
+ struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
- whdr = nfs_writehdr_alloc();
+ whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!whdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg);
@@ -1454,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+ pnfs_do_write(desc, hdr, desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
@@ -1468,13 +1626,15 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
int pnfs_read_done_resend_to_mds(struct inode *inode,
struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops)
+ const struct nfs_pgio_completion_ops *compl_ops,
+ struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read(&pgio, inode, compl_ops);
+ nfs_pageio_init_read(&pgio, inode, true, compl_ops);
+ pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1492,29 +1652,30 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
&hdr->pages,
- hdr->completion_ops);
+ hdr->completion_ops,
+ hdr->dreq);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
+ trace_nfs4_pnfs_read(data, hdr->pnfs_error);
if (likely(!hdr->pnfs_error)) {
__nfs4_read_done_cb(data);
hdr->mds_ops->rpc_call_done(&data->task, data);
@@ -1526,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1535,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_readdata_release(data);
+ nfs_pgio_data_release(data);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
@@ -1564,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
}
static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{
- struct nfs_read_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_read_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_readhdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_read_header *rhdr;
+ struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr;
int ret;
- rhdr = nfs_readhdr_alloc();
+ rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!rhdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
@@ -1610,18 +1765,27 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+ pnfs_do_read(desc, hdr);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+static void pnfs_clear_layoutcommitting(struct inode *inode)
+{
+ unsigned long *bitlock = &NFS_I(inode)->flags;
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_atomic();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
/*
* There can be multiple RW segments.
*/
@@ -1631,11 +1795,24 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
if (lseg->pls_range.iomode == IOMODE_RW &&
- test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
list_add(&lseg->pls_lc_list, listp);
}
}
+static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ /* Matched by references in pnfs_set_layoutcommit */
+ list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+ list_del_init(&lseg->pls_lc_list);
+ pnfs_put_lseg(lseg);
+ }
+
+ pnfs_clear_layoutcommitting(inode);
+}
+
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1643,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
@@ -1680,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+ pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
}
/*
@@ -1696,43 +1874,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
struct nfs4_layoutcommit_data *data;
struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos;
- int status = 0;
+ int status;
- dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
-
- if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ if (!pnfs_layoutcommit_outstanding(inode))
return 0;
- /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
- data = kzalloc(sizeof(*data), GFP_NOFS);
- if (!data) {
- status = -ENOMEM;
- goto out;
- }
-
- if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
- goto out_free;
+ dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
+ status = -EAGAIN;
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
- if (!sync) {
- status = -EAGAIN;
- goto out_free;
- }
- status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ if (!sync)
+ goto out;
+ status = wait_on_bit_lock(&nfsi->flags,
+ NFS_INO_LAYOUTCOMMITTING,
+ nfs_wait_bit_killable,
+ TASK_KILLABLE);
if (status)
- goto out_free;
+ goto out;
}
- INIT_LIST_HEAD(&data->lseg_list);
+ status = -ENOMEM;
+ /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
+ data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (!data)
+ goto clear_layoutcommitting;
+
+ status = 0;
spin_lock(&inode->i_lock);
- if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
- clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
- spin_unlock(&inode->i_lock);
- wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
- goto out_free;
- }
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ goto out_unlock;
+ INIT_LIST_HEAD(&data->lseg_list);
pnfs_list_write_lseg(inode, &data->lseg_list);
end_pos = nfsi->layout->plh_lwb;
@@ -1755,8 +1927,11 @@ out:
mark_inode_dirty_sync(inode);
dprintk("<-- %s status %d\n", __func__, status);
return status;
-out_free:
+out_unlock:
+ spin_unlock(&inode->i_lock);
kfree(data);
+clear_layoutcommitting:
+ pnfs_clear_layoutcommitting(inode);
goto out;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da..4fb309a2b4c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
- enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
- enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+ enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+ enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
struct pnfs_layout_hdr {
atomic_t plh_refcount;
struct list_head plh_layouts; /* other client layouts */
- struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
+ struct list_head plh_bulk_destroy;
struct list_head plh_segs; /* layout segments list */
nfs4_stateid plh_stateid;
atomic_t plh_outstanding; /* number of RPCs out */
@@ -149,9 +149,10 @@ struct pnfs_device {
struct nfs4_deviceid dev_id;
unsigned int layout_type;
unsigned int mincount;
+ unsigned int maxcount; /* gdia_maxcount */
struct page **pages;
unsigned int pgbase;
- unsigned int pglen;
+ unsigned int pglen; /* reply buffer length */
};
#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
@@ -170,7 +171,8 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
const struct nfs_fh *fh,
struct pnfs_devicelist *devlist);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
- struct pnfs_device *dev);
+ struct pnfs_device *dev,
+ struct rpc_cred *cred);
extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
@@ -178,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
- const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
- int, const struct nfs_pgio_completion_ops *);
-
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -190,12 +187,18 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
+int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
+ struct nfs_fsid *fsid,
+ bool is_recall);
+int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
+ bool is_recall);
void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
@@ -210,12 +213,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+int pnfs_commit_and_return_layout(struct inode *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -225,9 +229,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops);
+ const struct nfs_pgio_completion_ops *compl_ops,
+ struct nfs_direct_req *dreq);
int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops);
+ const struct nfs_pgio_completion_ops *compl_ops,
+ struct nfs_direct_req *dreq);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
@@ -265,7 +271,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
atomic_inc(&lseg->pls_refcount);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
}
return lseg;
}
@@ -349,6 +355,15 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode)
PNFS_LAYOUTRET_ON_SETATTR;
}
+static inline bool
+pnfs_layoutcommit_outstanding(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags) != 0 ||
+ test_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags) != 0;
+}
+
static inline int pnfs_return_layout(struct inode *ino)
{
struct nfs_inode *nfsi = NFS_I(ino);
@@ -400,6 +415,11 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+ return 0;
+}
+
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
@@ -437,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
static inline int
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
@@ -500,6 +508,13 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
return false;
}
+static inline bool
+pnfs_layoutcommit_outstanding(struct inode *inode)
+{
+ return false;
+}
+
+
static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
return NULL;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index d35b62e83ea..6da209bd940 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->ld == ld && d->nfs_client == clp &&
!memcmp(&d->deviceid, id, sizeof(*id))) {
if (atomic_read(&d->ref))
@@ -248,12 +247,11 @@ static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
HLIST_HEAD(tmp);
spin_lock(&nfs4_deviceid_lock);
rcu_read_lock();
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
if (d->nfs_client == clp && atomic_read(&d->ref)) {
hlist_del_init_rcu(&d->node);
hlist_add_head(&d->tmpnode, &tmp);
@@ -291,12 +289,11 @@ void
nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
{
struct nfs4_deviceid_node *d;
- struct hlist_node *n;
int i;
rcu_read_lock();
for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
- hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+ hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
if (d->nfs_client == clp)
set_bit(NFS_DEVICEID_INVALID, &d->flags);
}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f084dac948e..c171ce1a8a3 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -98,7 +98,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
*/
static int
nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
@@ -146,7 +146,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
static int
nfs_proc_lookup(struct inode *dir, struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+ struct nfs4_label *label)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
@@ -234,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
};
int status = -ENOMEM;
- dprintk("NFS call create %s\n", dentry->d_name.name);
+ dprintk("NFS call create %pd\n", dentry);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
@@ -243,7 +244,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply create: %d\n", status);
@@ -264,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
umode_t mode;
int status = -ENOMEM;
- dprintk("NFS call mknod %s\n", dentry->d_name.name);
+ dprintk("NFS call mknod %pd\n", dentry);
mode = sattr->ia_mode;
if (S_ISFIFO(mode)) {
@@ -290,7 +291,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
}
if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply mknod: %d\n", status);
@@ -356,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
}
static int
-nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
- struct inode *new_dir, struct qstr *new_name)
-{
- struct nfs_renameargs arg = {
- .old_dir = NFS_FH(old_dir),
- .old_name = old_name,
- .new_dir = NFS_FH(new_dir),
- .new_name = new_name,
- };
- struct rpc_message msg = {
- .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
- .rpc_argp = &arg,
- };
- int status;
-
- dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
- status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
- nfs_mark_for_revalidate(old_dir);
- nfs_mark_for_revalidate(new_dir);
- dprintk("NFS reply rename: %d\n", status);
- return status;
-}
-
-static int
nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
{
struct nfs_linkargs arg = {
@@ -422,7 +399,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
};
int status = -ENAMETOOLONG;
- dprintk("NFS call symlink %s\n", dentry->d_name.name);
+ dprintk("NFS call symlink %pd\n", dentry);
if (len > NFS2_MAXPATHLEN)
goto out;
@@ -442,7 +419,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
* should fill in the data with a LOOKUP call on the wire.
*/
if (status == 0)
- status = nfs_instantiate(dentry, fh, fattr);
+ status = nfs_instantiate(dentry, fh, fattr, NULL);
out_free:
nfs_free_fattr(fattr);
@@ -461,7 +438,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
};
int status = -ENOMEM;
- dprintk("NFS call mkdir %s\n", dentry->d_name.name);
+ dprintk("NFS call mkdir %pd\n", dentry);
data = nfs_alloc_createdata(dir, dentry, sattr);
if (data == NULL)
goto out;
@@ -471,7 +448,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
nfs_mark_for_revalidate(dir);
if (status == 0)
- status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
+ status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
nfs_free_createdata(data);
out:
dprintk("NFS reply mkdir: %d\n", status);
@@ -601,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0;
}
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -617,17 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
-static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
+ return 0;
}
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -636,18 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
-static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
-}
-
static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
BUG();
@@ -662,7 +635,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
static int
nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
- struct inode *inode = filp->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(filp);
return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
@@ -742,7 +715,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.unlink_setup = nfs_proc_unlink_setup,
.unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
.unlink_done = nfs_proc_unlink_done,
- .rename = nfs_proc_rename,
.rename_setup = nfs_proc_rename_setup,
.rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
.rename_done = nfs_proc_rename_done,
@@ -756,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs2_decode_dirent,
+ .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
.read_setup = nfs_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs_proc_read_rpc_prepare,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a5e5d9899d5..e818a475ca6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,85 +24,24 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_common_ops;
static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
-struct nfs_read_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
{
- struct nfs_read_header *rhdr;
-
- rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
- if (rhdr) {
- struct nfs_pgio_header *hdr = &rhdr->header;
-
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- }
- return rhdr;
+ return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
{
- struct nfs_read_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
-{
- struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
-
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
-
-void nfs_readdata_release(struct nfs_read_data *rdata)
-{
- struct nfs_pgio_header *hdr = rdata->header;
- struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
-
- put_nfs_open_context(rdata->args.context);
- if (rdata->pages.pagevec != rdata->pages.page_array)
- kfree(rdata->pages.pagevec);
- if (rdata == &read_header->rpc_data) {
- rdata->header = NULL;
- rdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(rdata);
-}
-EXPORT_SYMBOL_GPL(nfs_readdata_release);
static
int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page)
}
void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
- NFS_SERVER(inode)->rsize, 0);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+ server->rsize, 0);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, inode, page, 0, len);
+ new = nfs_create_request(ctx, page, NULL, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,20 +105,31 @@ static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *d_inode = req->wb_context->dentry->d_inode;
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+ (long long)req_offset(req));
- unlock_page(req->wb_page);
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
- dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+ unlock_page(req->wb_page);
+ }
+
+ dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
}
-/* Note io was page aligned */
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+ if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+ SetPageUptodate(req->wb_page);
+}
+
static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
+ unsigned long start = req->wb_pgbase;
+ unsigned long end = req->wb_pgbase + req->wb_bytes;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
- if (bytes > hdr->good_bytes)
- zero_user(page, 0, PAGE_SIZE);
- else if (hdr->good_bytes - bytes < PAGE_SIZE)
- zero_user_segment(page,
- hdr->good_bytes & ~PAGE_MASK,
- PAGE_SIZE);
+ /* note: regions of the page not covered by a
+ * request are zeroed in nfs_readpage_async /
+ * readpage_async_filler */
+ if (bytes > hdr->good_bytes) {
+ /* nothing in this request was good, so zero
+ * the full extent of the request */
+ zero_user_segment(page, start, end);
+
+ } else if (hdr->good_bytes - bytes < req->wb_bytes) {
+ /* part of this request has good bytes, but
+ * not all. zero the bad bytes */
+ start += hdr->good_bytes - bytes;
+ WARN_ON(start < req->wb_pgbase);
+ zero_user_segment(page, start, end);
+ }
}
bytes += req->wb_bytes;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (bytes <= hdr->good_bytes)
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
} else
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
@@ -203,95 +172,14 @@ out:
hdr->release(hdr);
}
-int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = clnt,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | swap_flags | flags,
- };
-
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
- "offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- rpc_put_task(task);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
-/*
- * Set up the NFS read request struct
- */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
- unsigned int count, unsigned int offset)
-{
- struct nfs_page *req = data->header->req;
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.eof = 0;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_read(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
-}
-
-static int
-nfs_do_multiple_reads(struct list_head *head,
- const struct rpc_call_ops *call_ops)
-{
- struct nfs_read_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_read(data, call_ops);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ task_setup_data->flags |= swap_flags;
+ NFS_PROTO(inode)->read_setup(data, msg);
}
static void
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
.completion = nfs_read_completion,
};
-static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_read_data, list);
- list_del(&data->list);
- nfs_readdata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple requests to fill a single page.
- *
- * We optimize to reduce the number of read operations on the wire. If we
- * detect that we're reading a page, or an area of a page, that is past the
- * end of file, we do not generate NFS read operations but just clear the
- * parts of the page that would have come back zero from the server anyway.
- *
- * We rely on the cached value of i_size to make this determination; another
- * client can fill pages on the server past our cached end-of-file, but we
- * won't see the new data until our attribute cache is updated. This is more
- * or less conventional NFS client behavior.
- */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_read_data *data;
- size_t rsize = desc->pg_bsize, nbytes;
- unsigned int offset;
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes,rsize);
-
- data = nfs_readdata_alloc(hdr, 1);
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_read_rpcsetup(data, len, offset);
- list_add(&data->list, &hdr->rpc_list);
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
-
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_read_data *data;
- struct list_head *head = &desc->pg_list;
-
- data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
-
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- nfs_read_rpcsetup(data, desc->pg_count, 0);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc, hdr);
- return nfs_pagein_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_pagein);
-
-static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_read_header *rhdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- rhdr = nfs_readhdr_alloc();
- if (!rhdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &rhdr->header;
- nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&hdr->rpc_list,
- desc->pg_rpc_callops);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_read_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
-};
-
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct inode *inode = data->header->inode;
- int status;
-
- dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
- task->tk_status);
-
- status = NFS_PROTO(inode)->read_done(task, data);
+ int status = NFS_PROTO(inode)->read_done(task, data);
if (status != 0)
return status;
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_readargs *argp = &data->args;
- struct nfs_readres *resp = &data->res;
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
/* This is a short read! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
rpc_restart_call_prepare(task);
}
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
- /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
- if (nfs_readpage_result(task, data) != 0)
- return;
- if (task->tk_status < 0)
- nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
- else if (data->res.eof) {
+ if (data->res.eof) {
loff_t bound;
bound = data->args.offset + data->res.count;
@@ -505,23 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
nfs_readpage_retry(task, data);
}
-static void nfs_readpage_release_common(void *calldata)
-{
- nfs_readdata_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_read_data *data = calldata;
- NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
-}
-
-static const struct rpc_call_ops nfs_read_common_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_common,
- .rpc_release = nfs_readpage_release_common,
-};
-
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
@@ -589,7 +325,6 @@ static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *new;
unsigned int len;
int error;
@@ -598,7 +333,7 @@ readpage_async_filler(void *data, struct page *page)
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(desc->ctx, inode, page, 0, len);
+ new = nfs_create_request(desc->ctx, page, NULL, 0, len);
if (IS_ERR(new))
goto out_error;
@@ -627,9 +362,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
unsigned long npages;
int ret = -ESTALE;
- dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
+ dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
+ (unsigned long long)NFS_FILEID(inode),
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
@@ -651,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
@@ -668,7 +404,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_read_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
@@ -681,3 +417,12 @@ void nfs_destroy_readpagecache(void)
{
kmem_cache_destroy(nfs_rdata_cachep);
}
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+ .rw_mode = FMODE_READ,
+ .rw_alloc_header = nfs_readhdr_alloc,
+ .rw_free_header = nfs_readhdr_free,
+ .rw_done = nfs_readpage_done,
+ .rw_result = nfs_readpage_result,
+ .rw_initiate = nfs_initiate_read,
+};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5..084af1060d7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -31,6 +31,7 @@
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/metrics.h>
#include <linux/sunrpc/xprtsock.h>
@@ -54,7 +55,6 @@
#include <linux/parser.h>
#include <linux/nsproxy.h>
#include <linux/rcupdate.h>
-#include <linux/kthread.h>
#include <asm/uaccess.h>
@@ -269,7 +269,7 @@ static match_table_t nfs_local_lock_tokens = {
enum {
Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
- Opt_vers_4_1,
+ Opt_vers_4_1, Opt_vers_4_2,
Opt_vers_err
};
@@ -280,6 +280,7 @@ static match_table_t nfs_vers_tokens = {
{ Opt_vers_4, "4" },
{ Opt_vers_4_0, "4.0" },
{ Opt_vers_4_1, "4.1" },
+ { Opt_vers_4_2, "4.2" },
{ Opt_vers_err, NULL }
};
@@ -292,8 +293,9 @@ struct file_system_type nfs_fs_type = {
.name = "nfs",
.mount = nfs_fs_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
+MODULE_ALIAS_FS("nfs");
EXPORT_SYMBOL_GPL(nfs_fs_type);
struct file_system_type nfs_xdev_fs_type = {
@@ -301,7 +303,7 @@ struct file_system_type nfs_xdev_fs_type = {
.name = "nfs",
.mount = nfs_xdev_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
const struct super_operations nfs_sops = {
@@ -331,8 +333,10 @@ struct file_system_type nfs4_fs_type = {
.name = "nfs4",
.mount = nfs_fs_mount,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
};
+MODULE_ALIAS_FS("nfs4");
+MODULE_ALIAS("nfs4");
EXPORT_SYMBOL_GPL(nfs4_fs_type);
static int __init register_nfs4_fs(void)
@@ -356,7 +360,8 @@ static void unregister_nfs4_fs(void)
#endif
static struct shrinker acl_shrinker = {
- .shrink = nfs_access_cache_shrinker,
+ .count_objects = nfs_access_cache_count,
+ .scan_objects = nfs_access_cache_scan,
.seeks = DEFAULT_SEEKS,
};
@@ -418,54 +423,6 @@ void nfs_sb_deactive(struct super_block *sb)
}
EXPORT_SYMBOL_GPL(nfs_sb_deactive);
-static int nfs_deactivate_super_async_work(void *ptr)
-{
- struct super_block *sb = ptr;
-
- deactivate_super(sb);
- module_put_and_exit(0);
- return 0;
-}
-
-/*
- * same effect as deactivate_super, but will do final unmount in kthread
- * context
- */
-static void nfs_deactivate_super_async(struct super_block *sb)
-{
- struct task_struct *task;
- char buf[INET6_ADDRSTRLEN + 1];
- struct nfs_server *server = NFS_SB(sb);
- struct nfs_client *clp = server->nfs_client;
-
- if (!atomic_add_unless(&sb->s_active, -1, 1)) {
- rcu_read_lock();
- snprintf(buf, sizeof(buf),
- rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
- rcu_read_unlock();
-
- __module_get(THIS_MODULE);
- task = kthread_run(nfs_deactivate_super_async_work, sb,
- "%s-deactivate-super", buf);
- if (IS_ERR(task)) {
- pr_err("%s: kthread_run: %ld\n",
- __func__, PTR_ERR(task));
- /* make synchronous call and hope for the best */
- deactivate_super(sb);
- module_put(THIS_MODULE);
- }
- }
-}
-
-void nfs_sb_deactive_async(struct super_block *sb)
-{
- struct nfs_server *server = NFS_SB(sb);
-
- if (atomic_dec_and_test(&server->active))
- nfs_deactivate_super_async(sb);
-}
-EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
-
/*
* Deliver file system statistics to userspace
*/
@@ -540,7 +497,8 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
static const struct {
rpc_authflavor_t flavour;
const char *str;
- } sec_flavours[] = {
+ } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = {
+ /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */
{ RPC_AUTH_NULL, "null" },
{ RPC_AUTH_UNIX, "sys" },
{ RPC_AUTH_GSS_KRB5, "krb5" },
@@ -877,6 +835,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
seq_printf(m, "\n\tnfsv4:\t");
seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]);
seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
show_sessions(m, nfss);
show_pnfs(m, nfss);
@@ -965,8 +924,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
data->mount_server.port = NFS_UNSPEC_PORT;
data->nfs_server.port = NFS_UNSPEC_PORT;
data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- data->auth_flavors[0] = RPC_AUTH_UNIX;
- data->auth_flavor_len = 1;
+ data->selected_flavor = RPC_AUTH_MAXFLAVOR;
data->minorversion = 0;
data->need_mount = true;
data->net = current->nsproxy->net_ns;
@@ -1062,55 +1020,108 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
}
/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
+ rpc_authflavor_t flavor)
+{
+ unsigned int i;
+ unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
+ sizeof(auth_info->flavors[0]));
+
+ /* make sure this flavor isn't already in the list */
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (flavor == auth_info->flavors[i])
+ return true;
+ }
+
+ if (auth_info->flavor_len + 1 >= max_flavor_len) {
+ dfprintk(MOUNT, "NFS: too many sec= flavors\n");
+ return false;
+ }
+
+ auth_info->flavors[auth_info->flavor_len++] = flavor;
+ return true;
+}
+
+/*
+ * Return true if 'match' is in auth_info or auth_info is empty.
+ * Return false otherwise.
+ */
+bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
+ rpc_authflavor_t match)
+{
+ int i;
+
+ if (!auth_info->flavor_len)
+ return true;
+
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (auth_info->flavors[i] == match)
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nfs_auth_info_match);
+
+/*
* Parse the value of the 'sec=' option.
*/
static int nfs_parse_security_flavors(char *value,
struct nfs_parsed_mount_data *mnt)
{
substring_t args[MAX_OPT_ARGS];
+ rpc_authflavor_t pseudoflavor;
+ char *p;
dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
- switch (match_token(value, nfs_secflavor_tokens, args)) {
- case Opt_sec_none:
- mnt->auth_flavors[0] = RPC_AUTH_NULL;
- break;
- case Opt_sec_sys:
- mnt->auth_flavors[0] = RPC_AUTH_UNIX;
- break;
- case Opt_sec_krb5:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
- break;
- case Opt_sec_krb5i:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
- break;
- case Opt_sec_krb5p:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
- break;
- case Opt_sec_lkey:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
- break;
- case Opt_sec_lkeyi:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
- break;
- case Opt_sec_lkeyp:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
- break;
- case Opt_sec_spkm:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
- break;
- case Opt_sec_spkmi:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
- break;
- case Opt_sec_spkmp:
- mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
- break;
- default:
- return 0;
+ while ((p = strsep(&value, ":")) != NULL) {
+ switch (match_token(p, nfs_secflavor_tokens, args)) {
+ case Opt_sec_none:
+ pseudoflavor = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ pseudoflavor = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ pseudoflavor = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ pseudoflavor = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ pseudoflavor = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ pseudoflavor = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ pseudoflavor = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ pseudoflavor = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ pseudoflavor = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ pseudoflavor = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ pseudoflavor = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ dfprintk(MOUNT,
+ "NFS: sec= option '%s' not recognized\n", p);
+ return 0;
+ }
+
+ if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
+ return 0;
}
- mnt->flags |= NFS_MOUNT_SECFLAVOUR;
- mnt->auth_flavor_len = 1;
return 1;
}
@@ -1142,6 +1153,10 @@ static int nfs_parse_version_string(char *string,
mnt->version = 4;
mnt->minorversion = 1;
break;
+ case Opt_vers_4_2:
+ mnt->version = 4;
+ mnt->minorversion = 2;
+ break;
default:
return 0;
}
@@ -1599,7 +1614,7 @@ static int nfs_parse_mount_options(char *raw,
goto out_minorversion_mismatch;
if (mnt->options & NFS_OPTION_MIGRATION &&
- mnt->version != 4 && mnt->minorversion != 0)
+ (mnt->version != 4 || mnt->minorversion != 0))
goto out_migration_misuse;
/*
@@ -1653,49 +1668,40 @@ out_security_failure:
}
/*
- * Match the requested auth flavors with the list returned by
- * the server. Returns zero and sets the mount's authentication
- * flavor on success; returns -EACCES if server does not support
- * the requested flavor.
+ * Ensure that a specified authtype in args->auth_info is supported by
+ * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * -EACCES if not.
*/
-static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
- struct nfs_mount_request *request)
+static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
+ rpc_authflavor_t *server_authlist, unsigned int count)
{
- unsigned int i, j, server_authlist_len = *(request->auth_flav_len);
+ rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
+ unsigned int i;
/*
- * Certain releases of Linux's mountd return an empty
- * flavor list. To prevent behavioral regression with
- * these servers (ie. rejecting mounts that used to
- * succeed), revert to pre-2.6.32 behavior (no checking)
- * if the returned flavor list is empty.
- */
- if (server_authlist_len == 0)
- return 0;
-
- /*
- * We avoid sophisticated negotiating here, as there are
- * plenty of cases where we can get it wrong, providing
- * either too little or too much security.
+ * If the sec= mount option is used, the specified flavor or AUTH_NULL
+ * must be in the list returned by the server.
*
- * RFC 2623, section 2.7 suggests we SHOULD prefer the
- * flavor listed first. However, some servers list
- * AUTH_NULL first. Our caller plants AUTH_SYS, the
- * preferred default, in args->auth_flavors[0] if user
- * didn't specify sec= mount option.
+ * AUTH_NULL has a special meaning when it's in the server list - it
+ * means that the server will ignore the rpc creds, so any flavor
+ * can be used.
*/
- for (i = 0; i < args->auth_flavor_len; i++)
- for (j = 0; j < server_authlist_len; j++)
- if (args->auth_flavors[i] == request->auth_flavs[j]) {
- dfprintk(MOUNT, "NFS: using auth flavor %d\n",
- request->auth_flavs[j]);
- args->auth_flavors[0] = request->auth_flavs[j];
- return 0;
- }
+ for (i = 0; i < count; i++) {
+ flavor = server_authlist[i];
+
+ if (nfs_auth_info_match(&args->auth_info, flavor) ||
+ flavor == RPC_AUTH_NULL)
+ goto out;
+ }
- dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n");
- nfs_umount(request);
+ dfprintk(MOUNT,
+ "NFS: specified auth flavors not supported by server\n");
return -EACCES;
+
+out:
+ args->selected_flavor = flavor;
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
+ return 0;
}
/*
@@ -1703,10 +1709,10 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
* corresponding to the provided path.
*/
static int nfs_request_mount(struct nfs_parsed_mount_data *args,
- struct nfs_fh *root_fh)
+ struct nfs_fh *root_fh,
+ rpc_authflavor_t *server_authlist,
+ unsigned int *server_authlist_len)
{
- rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
- unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
struct nfs_mount_request request = {
.sap = (struct sockaddr *)
&args->mount_server.address,
@@ -1714,7 +1720,7 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args,
.protocol = args->mount_server.protocol,
.fh = root_fh,
.noresvport = args->flags & NFS_MOUNT_NORESVPORT,
- .auth_flav_len = &server_authlist_len,
+ .auth_flav_len = server_authlist_len,
.auth_flavs = server_authlist,
.net = args->net,
};
@@ -1758,29 +1764,93 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args,
return status;
}
+ return 0;
+}
+
+static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
+{
+ int status;
+ unsigned int i;
+ bool tried_auth_unix = false;
+ bool auth_null_in_list = false;
+ struct nfs_server *server = ERR_PTR(-EACCES);
+ struct nfs_parsed_mount_data *args = mount_info->parsed;
+ rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
+ unsigned int authlist_len = ARRAY_SIZE(authlist);
+
+ status = nfs_request_mount(args, mount_info->mntfh, authlist,
+ &authlist_len);
+ if (status)
+ return ERR_PTR(status);
+
/*
- * MNTv1 (NFSv2) does not support auth flavor negotiation.
+ * Was a sec= authflavor specified in the options? First, verify
+ * whether the server supports it, and then just try to use it if so.
*/
- if (args->mount_server.version != NFS_MNT3_VERSION)
- return 0;
- return nfs_walk_authlist(args, &request);
+ if (args->auth_info.flavor_len > 0) {
+ status = nfs_verify_authflavors(args, authlist, authlist_len);
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n",
+ args->selected_flavor);
+ if (status)
+ return ERR_PTR(status);
+ return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ }
+
+ /*
+ * No sec= option was provided. RFC 2623, section 2.7 suggests we
+ * SHOULD prefer the flavor listed first. However, some servers list
+ * AUTH_NULL first. Avoid ever choosing AUTH_NULL.
+ */
+ for (i = 0; i < authlist_len; ++i) {
+ rpc_authflavor_t flavor;
+ struct rpcsec_gss_info info;
+
+ flavor = authlist[i];
+ switch (flavor) {
+ case RPC_AUTH_UNIX:
+ tried_auth_unix = true;
+ break;
+ case RPC_AUTH_NULL:
+ auth_null_in_list = true;
+ continue;
+ default:
+ if (rpcauth_get_gssinfo(flavor, &info) != 0)
+ continue;
+ /* Fallthrough */
+ }
+ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
+ args->selected_flavor = flavor;
+ server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ if (!IS_ERR(server))
+ return server;
+ }
+
+ /*
+ * Nothing we tried so far worked. At this point, give up if we've
+ * already tried AUTH_UNIX or if the server's list doesn't contain
+ * AUTH_NULL
+ */
+ if (tried_auth_unix || !auth_null_in_list)
+ return server;
+
+ /* Last chance! Try AUTH_UNIX */
+ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
+ args->selected_flavor = RPC_AUTH_UNIX;
+ return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
}
struct dentry *nfs_try_mount(int flags, const char *dev_name,
struct nfs_mount_info *mount_info,
struct nfs_subversion *nfs_mod)
{
- int status;
struct nfs_server *server;
- if (mount_info->parsed->need_mount) {
- status = nfs_request_mount(mount_info->parsed, mount_info->mntfh);
- if (status)
- return ERR_PTR(status);
- }
+ if (mount_info->parsed->need_mount)
+ server = nfs_try_mount_request(mount_info, nfs_mod);
+ else
+ server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
- /* Get a volume representation */
- server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
if (IS_ERR(server))
return ERR_CAST(server);
@@ -1883,6 +1953,7 @@ static int nfs23_validate_mount_data(void *options,
{
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
+ int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
if (data == NULL)
goto out_no_data;
@@ -1898,6 +1969,8 @@ static int nfs23_validate_mount_data(void *options,
goto out_no_v3;
data->root.size = NFS2_FHSIZE;
memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ /* Turn off security negotiation */
+ extra_flags |= NFS_MOUNT_SECFLAVOUR;
case 4:
if (data->flags & NFS_MOUNT_SECFLAVOUR)
goto out_no_sec;
@@ -1925,7 +1998,7 @@ static int nfs23_validate_mount_data(void *options,
* can deal with.
*/
args->flags = data->flags & NFS_MOUNT_FLAGMASK;
- args->flags |= NFS_MOUNT_LEGACY_INTERFACE;
+ args->flags |= extra_flags;
args->rsize = data->rsize;
args->wsize = data->wsize;
args->timeo = data->timeo;
@@ -1950,7 +2023,9 @@ static int nfs23_validate_mount_data(void *options,
args->bsize = data->bsize;
if (data->flags & NFS_MOUNT_SECFLAVOUR)
- args->auth_flavors[0] = data->pseudoflavor;
+ args->selected_flavor = data->pseudoflavor;
+ else
+ args->selected_flavor = RPC_AUTH_UNIX;
if (!args->nfs_server.hostname)
goto out_nomem;
@@ -2073,6 +2148,8 @@ static int nfs_validate_text_mount_data(void *options,
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
nfs_validate_transport_protocol(args);
+ if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
nfs4_validate_mount_flags(args);
#else
goto out_v4_not_compiled;
@@ -2082,9 +2159,6 @@ static int nfs_validate_text_mount_data(void *options,
nfs_set_port(sap, &args->nfs_server.port, port);
- if (args->auth_flavor_len > 1)
- goto out_bad_auth;
-
return nfs_parse_devname(dev_name,
&args->nfs_server.hostname,
max_namelen,
@@ -2095,26 +2169,40 @@ static int nfs_validate_text_mount_data(void *options,
out_v4_not_compiled:
dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
return -EPROTONOSUPPORT;
+#else
+out_invalid_transport_udp:
+ dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
+ return -EINVAL;
#endif /* !CONFIG_NFS_V4 */
out_no_address:
dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
return -EINVAL;
-
-out_bad_auth:
- dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n");
- return -EINVAL;
}
+#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+ | NFS_MOUNT_SECURE \
+ | NFS_MOUNT_TCP \
+ | NFS_MOUNT_VER3 \
+ | NFS_MOUNT_KERBEROS \
+ | NFS_MOUNT_NONLM \
+ | NFS_MOUNT_BROKEN_SUID \
+ | NFS_MOUNT_STRICTLOCK \
+ | NFS_MOUNT_UNSHARED \
+ | NFS_MOUNT_NORESVPORT \
+ | NFS_MOUNT_LEGACY_INTERFACE)
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
{
- if (data->flags != nfss->flags ||
+ if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
+ data->version != nfss->nfs_client->rpc_ops->version ||
+ data->minorversion != nfss->nfs_client->cl_minorversion ||
data->retrans != nfss->client->cl_timeout->to_retries ||
- data->auth_flavors[0] != nfss->client->cl_auth->au_flavor ||
+ data->selected_flavor != nfss->client->cl_auth->au_flavor ||
data->acregmin != nfss->acregmin / HZ ||
data->acregmax != nfss->acregmax / HZ ||
data->acdirmin != nfss->acdirmin / HZ ||
@@ -2139,6 +2227,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
u32 nfsvers = nfss->nfs_client->rpc_ops->version;
+ sync_filesystem(sb);
+
/*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
@@ -2159,7 +2249,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->rsize = nfss->rsize;
data->wsize = nfss->wsize;
data->retrans = nfss->client->cl_timeout->to_retries;
- data->auth_flavors[0] = nfss->client->cl_auth->au_flavor;
+ data->selected_flavor = nfss->client->cl_auth->au_flavor;
+ data->auth_info = nfss->auth_info;
data->acregmin = nfss->acregmin / HZ;
data->acregmax = nfss->acregmax / HZ;
data->acdirmin = nfss->acdirmin / HZ;
@@ -2167,12 +2258,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
data->nfs_server.port = nfss->port;
data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
+ data->version = nfsvers;
+ data->minorversion = nfss->nfs_client->cl_minorversion;
+ data->net = current->nsproxy->net_ns;
memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
data->nfs_server.addrlen);
/* overwrite those values with any that were specified */
- error = nfs_parse_mount_options((char *)options, data);
- if (error < 0)
+ error = -EINVAL;
+ if (!nfs_parse_mount_options((char *)options, data))
goto out;
/*
@@ -2276,7 +2370,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->nfs_client != b->nfs_client)
goto Ebusy;
- if (a->flags != b->flags)
+ if ((a->flags ^ b->flags) & NFS_MOUNT_CMP_FLAGMASK)
goto Ebusy;
if (a->wsize != b->wsize)
goto Ebusy;
@@ -2290,7 +2384,8 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
goto Ebusy;
if (a->acdirmax != b->acdirmax)
goto Ebusy;
- if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+ if (b->auth_info.flavor_len > 0 &&
+ clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
goto Ebusy;
return 1;
Ebusy:
@@ -2418,7 +2513,21 @@ static int nfs_bdi_register(struct nfs_server *server)
int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
struct nfs_mount_info *mount_info)
{
- return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
+ int error;
+ unsigned long kflags = 0, kflags_out = 0;
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+ kflags |= SECURITY_LSM_NATIVE_LABELS;
+
+ error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts,
+ kflags, &kflags_out);
+ if (error)
+ goto err;
+
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+ !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+ NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
+err:
+ return error;
}
EXPORT_SYMBOL_GPL(nfs_set_sb_security);
@@ -2426,10 +2535,9 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
struct nfs_mount_info *mount_info)
{
/* clone any lsm security options from the parent to the new sb */
- security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
return -ESTALE;
- return 0;
+ return security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
}
EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
@@ -2454,6 +2562,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
if (server->flags & NFS_MOUNT_NOAC)
sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+ if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
+ if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
+ sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
/* Get a superblock - note that we may end up sharing one that already exists */
s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
if (IS_ERR(s)) {
@@ -2470,6 +2582,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
mntroot = ERR_PTR(error);
goto error_splat_bdi;
}
+ server->super = s;
}
if (!s->s_root) {
@@ -2589,27 +2702,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
- int error;
- dprintk("--> nfs_xdev_mount_common()\n");
+ dprintk("--> nfs_xdev_mount()\n");
mount_info.mntfh = mount_info.cloned->fh;
/* create a new volume representation */
server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
- if (IS_ERR(server)) {
- error = PTR_ERR(server);
- goto out_err;
- }
- mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod);
- dprintk("<-- nfs_xdev_mount_common() = 0\n");
-out:
- return mntroot;
+ if (IS_ERR(server))
+ mntroot = ERR_CAST(server);
+ else
+ mntroot = nfs_fs_mount_common(server, flags,
+ dev_name, &mount_info, nfs_mod);
-out_err:
- dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error);
- goto out;
+ dprintk("<-- nfs_xdev_mount() = %ld\n",
+ IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
+ return mntroot;
}
#if IS_ENABLED(CONFIG_NFS_V4)
@@ -2650,13 +2759,16 @@ static int nfs4_validate_mount_data(void *options,
args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
if (data->auth_flavourlen) {
+ rpc_authflavor_t pseudoflavor;
if (data->auth_flavourlen > 1)
goto out_inval_auth;
- if (copy_from_user(&args->auth_flavors[0],
+ if (copy_from_user(&pseudoflavor,
data->auth_flavours,
- sizeof(args->auth_flavors[0])))
+ sizeof(pseudoflavor)))
return -EFAULT;
- }
+ args->selected_flavor = pseudoflavor;
+ } else
+ args->selected_flavor = RPC_AUTH_UNIX;
c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
if (IS_ERR(c))
@@ -2690,6 +2802,8 @@ static int nfs4_validate_mount_data(void *options,
args->acdirmax = data->acdirmax;
args->nfs_server.protocol = data->proto;
nfs_validate_transport_protocol(args);
+ if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
break;
default:
@@ -2710,6 +2824,10 @@ out_inval_auth:
out_no_address:
dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
return -EINVAL;
+
+out_invalid_transport_udp:
+ dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
+ return -EINVAL;
}
/*
@@ -2725,6 +2843,7 @@ bool nfs4_disable_idmapping = true;
unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
unsigned short send_implementation_id = 1;
char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
+bool recover_lost_locks = false;
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
@@ -2733,6 +2852,7 @@ EXPORT_SYMBOL_GPL(nfs4_disable_idmapping);
EXPORT_SYMBOL_GPL(max_session_slots);
EXPORT_SYMBOL_GPL(send_implementation_id);
EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
+EXPORT_SYMBOL_GPL(recover_lost_locks);
#define NFS_CALLBACK_MAXPORTNR (65535U)
@@ -2769,6 +2889,11 @@ module_param(send_implementation_id, ushort, 0644);
MODULE_PARM_DESC(send_implementation_id,
"Send implementation ID with NFSv4.1 exchange_id");
MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
-MODULE_ALIAS("nfs4");
+
+module_param(recover_lost_locks, bool, 0644);
+MODULE_PARM_DESC(recover_lost_locks,
+ "If the server reports that a lock might be lost, "
+ "try to recover it risking data corruption.");
+
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 6b3f2535a3e..bb6ed810fa6 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -13,7 +13,7 @@
static struct ctl_table_header *nfs_callback_sysctl_table;
-static ctl_table nfs_cb_sysctls[] = {
+static struct ctl_table nfs_cb_sysctls[] = {
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
@@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_dir[] = {
+static struct ctl_table nfs_cb_sysctl_dir[] = {
{
.procname = "nfs",
.mode = 0555,
@@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = {
{ }
};
-static ctl_table nfs_cb_sysctl_root[] = {
+static struct ctl_table nfs_cb_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b..de54129336c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,12 +14,15 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/namei.h>
+#include <linux/fsnotify.h>
#include "internal.h"
#include "nfs4_fs.h"
#include "iostat.h"
#include "delegation.h"
+#include "nfstrace.h"
+
/**
* nfs_free_unlinkdata - release data from a sillydelete operation.
* @data: pointer to unlink structure.
@@ -77,6 +80,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
struct nfs_unlinkdata *data = calldata;
struct inode *dir = data->dir;
+ trace_nfs_sillyrename_unlink(data, task->tk_status);
if (!NFS_PROTO(dir)->unlink_done(task, dir))
rpc_restart_call_prepare(task);
}
@@ -95,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata)
nfs_dec_sillycount(data->dir);
nfs_free_unlinkdata(data);
- nfs_sb_deactive_async(sb);
+ nfs_sb_deactive(sb);
}
static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -204,6 +208,13 @@ out_free:
return ret;
}
+void nfs_wait_on_sillyrename(struct dentry *dentry)
+{
+ struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+
+ wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
+}
+
void nfs_block_sillyrename(struct dentry *dentry)
{
struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
@@ -268,8 +279,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
* point dentry is definitely not a root, so we won't need
* that anymore.
*/
- if (devname_garbage)
- kfree(devname_garbage);
+ kfree(devname_garbage);
return 0;
out_unlock:
spin_unlock(&dentry->d_lock);
@@ -336,20 +346,16 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
struct inode *old_dir = data->old_dir;
struct inode *new_dir = data->new_dir;
struct dentry *old_dentry = data->old_dentry;
- struct dentry *new_dentry = data->new_dentry;
+ trace_nfs_sillyrename_rename(old_dir, old_dentry,
+ new_dir, data->new_dentry, task->tk_status);
if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
rpc_restart_call_prepare(task);
return;
}
- if (task->tk_status != 0) {
- nfs_cancel_async_unlink(old_dentry);
- return;
- }
-
- d_drop(old_dentry);
- d_drop(new_dentry);
+ if (data->complete)
+ data->complete(task, data);
}
/**
@@ -394,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = {
*
* It's expected that valid references to the dentries and inodes are held
*/
-static struct rpc_task *
+struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
- struct dentry *old_dentry, struct dentry *new_dentry)
+ struct dentry *old_dentry, struct dentry *new_dentry,
+ void (*complete)(struct rpc_task *, struct nfs_renamedata *))
{
struct nfs_renamedata *data;
struct rpc_message msg = { };
@@ -433,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
data->new_dentry = dget(new_dentry);
nfs_fattr_init(&data->old_fattr);
nfs_fattr_init(&data->new_fattr);
+ data->complete = complete;
/* set up nfs_renameargs */
data->args.old_dir = NFS_FH(old_dir);
@@ -451,6 +459,35 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
return rpc_run_task(&task_setup_data);
}
+/*
+ * Perform tasks needed when a sillyrename is done such as cancelling the
+ * queued async unlink if it failed.
+ */
+static void
+nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ struct dentry *dentry = data->old_dentry;
+
+ if (task->tk_status != 0) {
+ nfs_cancel_async_unlink(dentry);
+ return;
+ }
+
+ /*
+ * vfs_unlink and the like do not issue this when a file is
+ * sillyrenamed, so do it here.
+ */
+ fsnotify_nameremove(dentry, 0);
+}
+
+#define SILLYNAME_PREFIX ".nfs"
+#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
+#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
+#define SILLYNAME_COUNTER_LEN ((unsigned)sizeof(unsigned int) << 1)
+#define SILLYNAME_LEN (SILLYNAME_PREFIX_LEN + \
+ SILLYNAME_FILEID_LEN + \
+ SILLYNAME_COUNTER_LEN)
+
/**
* nfs_sillyrename - Perform a silly-rename of a dentry
* @dir: inode of directory that contains dentry
@@ -476,43 +513,39 @@ int
nfs_sillyrename(struct inode *dir, struct dentry *dentry)
{
static unsigned int sillycounter;
- const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
- const int countersize = sizeof(sillycounter)*2;
- const int slen = sizeof(".nfs")+fileidsize+countersize-1;
- char silly[slen+1];
+ unsigned char silly[SILLYNAME_LEN + 1];
+ unsigned long long fileid;
struct dentry *sdentry;
struct rpc_task *task;
- int error = -EIO;
+ int error = -EBUSY;
- dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- dentry->d_count);
+ dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
+ dentry, d_count(dentry));
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
* We don't allow a dentry to be silly-renamed twice.
*/
- error = -EBUSY;
if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
goto out;
- sprintf(silly, ".nfs%*.*Lx",
- fileidsize, fileidsize,
- (unsigned long long)NFS_FILEID(dentry->d_inode));
+ fileid = NFS_FILEID(dentry->d_inode);
/* Return delegation in anticipation of the rename */
NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
sdentry = NULL;
do {
- char *suffix = silly + slen - countersize;
-
+ int slen;
dput(sdentry);
sillycounter++;
- sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+ slen = scnprintf(silly, sizeof(silly),
+ SILLYNAME_PREFIX "%0*llx%0*x",
+ SILLYNAME_FILEID_LEN, fileid,
+ SILLYNAME_COUNTER_LEN, sillycounter);
- dfprintk(VFS, "NFS: trying to rename %s to %s\n",
- dentry->d_name.name, silly);
+ dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
+ dentry, silly);
sdentry = lookup_one_len(silly, dentry->d_parent, slen);
/*
@@ -539,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
}
/* run the rename task, undo unlink if it fails */
- task = nfs_async_rename(dir, dir, dentry, sdentry);
+ task = nfs_async_rename(dir, dir, dentry, sdentry,
+ nfs_complete_sillyrename);
if (IS_ERR(task)) {
error = -EBUSY;
nfs_cancel_async_unlink(dentry);
@@ -550,6 +584,18 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
error = rpc_wait_for_completion_task(task);
if (error == 0)
error = task->tk_status;
+ switch (error) {
+ case 0:
+ /* The rename succeeded */
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ d_move(dentry, sdentry);
+ break;
+ case -ERESTARTSYS:
+ /* The result of the rename is unknown. Play it safe by
+ * forcing a new lookup */
+ d_drop(dentry);
+ d_drop(sdentry);
+ }
rpc_put_task(task);
out_dput:
dput(sdentry);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c483cc50b82..5e2f1030454 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -31,6 +31,8 @@
#include "fscache.h"
#include "pnfs.h"
+#include "nfstrace.h"
+
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
#define MIN_POOL_WRITE (32)
@@ -40,10 +42,11 @@
* Local function declarations
*/
static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -68,76 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-struct nfs_write_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
{
- struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
-
- if (p) {
- struct nfs_pgio_header *hdr = &p->header;
+ struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+ if (p)
memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- hdr->verf = &p->verf;
- }
return p;
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
{
- struct nfs_write_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
-{
- struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
mempool_free(whdr, nfs_wdata_mempool);
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
-
-void nfs_writedata_release(struct nfs_write_data *wdata)
-{
- struct nfs_pgio_header *hdr = wdata->header;
- struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
-
- put_nfs_open_context(wdata->args.context);
- if (wdata->pages.pagevec != wdata->pages.page_array)
- kfree(wdata->pages.pagevec);
- if (wdata == &write_header->rpc_data) {
- wdata->header = NULL;
- wdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(wdata);
-}
-EXPORT_SYMBOL_GPL(nfs_writedata_release);
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
@@ -146,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
@@ -159,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
- req = freq;
+ req = freq->wb_head;
break;
}
}
}
- if (req)
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+
kref_get(&req->wb_kref);
+ }
return req;
}
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -209,18 +170,78 @@ static void nfs_set_pageerror(struct page *page)
nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
}
+/*
+ * nfs_page_group_search_locked
+ * @head - head request of page group
+ * @page_offset - offset into page
+ *
+ * Search page group with head @head to find a request that contains the
+ * page offset @page_offset.
+ *
+ * Returns a pointer to the first matching nfs request, or NULL if no
+ * match is found.
+ *
+ * Must be called with the page group lock held
+ */
+static struct nfs_page *
+nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
+{
+ struct nfs_page *req;
+
+ WARN_ON_ONCE(head != head->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
+
+ req = head;
+ do {
+ if (page_offset >= req->wb_pgbase &&
+ page_offset < (req->wb_pgbase + req->wb_bytes))
+ return req;
+
+ req = req->wb_this_page;
+ } while (req != head);
+
+ return NULL;
+}
+
+/*
+ * nfs_page_group_covers_page
+ * @head - head request of page group
+ *
+ * Return true if the page group with head @head covers the whole page,
+ * returns false otherwise
+ */
+static bool nfs_page_group_covers_page(struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+ unsigned int pos = 0;
+ unsigned int len = nfs_page_length(req->wb_page);
+
+ nfs_page_group_lock(req);
+
+ do {
+ tmp = nfs_page_group_search_locked(req->wb_head, pos);
+ if (tmp) {
+ /* no way this should happen */
+ WARN_ON_ONCE(tmp->wb_pgbase != pos);
+ pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
+ }
+ } while (tmp && pos < len);
+
+ nfs_page_group_unlock(req);
+ WARN_ON_ONCE(pos > len);
+ return pos == len;
+}
+
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
-static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+static void nfs_mark_uptodate(struct nfs_page *req)
{
- if (PageUptodate(page))
- return;
- if (base != 0)
+ if (PageUptodate(req->wb_page))
return;
- if (count != nfs_page_length(page))
+ if (!nfs_page_group_covers_page(req))
return;
- SetPageUptodate(page);
+ SetPageUptodate(req->wb_page);
}
static int wb_priority(struct writeback_control *wbc)
@@ -256,46 +277,259 @@ static void nfs_set_page_writeback(struct page *page)
}
}
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = page_file_mapping(req->wb_page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
- end_page_writeback(page);
+ if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+ return;
+
+ end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ * @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+ clear_bit(PG_TEARDOWN, &req->wb_flags);
+ clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+ clear_bit(PG_UPTODATE, &req->wb_flags);
+ clear_bit(PG_WB_END, &req->wb_flags);
+ clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head - head request of page group, must be holding head lock
+ * @req - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ * and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req, bool nonblock)
+ __releases(&inode->i_lock)
+{
+ struct nfs_page *tmp;
+ int ret;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+ nfs_unlock_request(tmp);
+
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+ /* grab a ref on the request that will be waited on */
+ kref_get(&req->wb_kref);
+
+ nfs_page_group_unlock(head);
+ spin_unlock(&inode->i_lock);
+
+ /* release ref from nfs_page_find_head_request_locked */
+ nfs_release_request(head);
+
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
+ nfs_release_request(req);
+
+ return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+ struct nfs_page *old_head)
+{
+ while (destroy_list) {
+ struct nfs_page *subreq = destroy_list;
+
+ destroy_list = (subreq->wb_this_page == old_head) ?
+ NULL : subreq->wb_this_page;
+
+ WARN_ON_ONCE(old_head != subreq->wb_head);
+
+ /* make sure old group is not used */
+ subreq->wb_head = subreq;
+ subreq->wb_this_page = subreq;
+
+ nfs_clear_request_commit(subreq);
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_request(subreq);
+
+ if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ /* release ref on old head request */
+ nfs_release_request(old_head);
+
+ nfs_page_group_clear_bits(subreq);
+
+ /* release the PG_INODE_REF reference */
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ nfs_release_request(subreq);
+ else
+ WARN_ON_ONCE(1);
+ } else {
+ WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+ /* zombie requests have already released the last
+ * reference and were waiting on the rest of the
+ * group to complete. Since it's no longer part of a
+ * group, simply free the request */
+ nfs_page_group_clear_bits(subreq);
+ nfs_free_request(subreq);
+ }
+ }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ * a locked reference, cancelling any pending
+ * operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *head, *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int total_bytes;
int ret;
+try_again:
+ total_bytes = 0;
+
+ WARN_ON_ONCE(destroy_list);
+
spin_lock(&inode->i_lock);
- for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req))
- break;
- /* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request() will always
- * succeed provided that someone hasn't already marked the
- * request as dirty (in which case we don't care).
- */
+
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+ if (!head) {
spin_unlock(&inode->i_lock);
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
- if (ret != 0)
+ return NULL;
+ }
+
+ /* lock each request in the page group */
+ nfs_page_group_lock(head);
+ subreq = head;
+ do {
+ /*
+ * Subrequests are always contiguous, non overlapping
+ * and in order. If not, it's a programming error.
+ */
+ WARN_ON_ONCE(subreq->wb_offset !=
+ (head->wb_offset + total_bytes));
+
+ /* keep track of how many bytes this group covers */
+ total_bytes += subreq->wb_bytes;
+
+ if (!nfs_lock_request(subreq)) {
+ /* releases page group bit lock and
+ * inode spin lock and all references */
+ ret = nfs_unroll_locks_and_wait(inode, head,
+ subreq, nonblock);
+
+ if (ret == 0)
+ goto try_again;
+
return ERR_PTR(ret);
- spin_lock(&inode->i_lock);
+ }
+
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_list_remove_request(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+
+ /* change head request to cover whole range that
+ * the former page group covered */
+ head->wb_bytes = total_bytes;
}
+
+ /*
+ * prepare head request to be added to new pgio descriptor
+ */
+ nfs_page_group_clear_bits(head);
+
+ /*
+ * some part of the group was still on the inode list - otherwise
+ * the group wouldn't be involved in async write.
+ * grab a reference for the head request, iff it needs one.
+ */
+ if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ kref_get(&head->wb_kref);
+
+ nfs_page_group_unlock(head);
+
+ /* drop lock to clear_request_commit the head req and clean up
+ * requests on destroy list */
spin_unlock(&inode->i_lock);
- return req;
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+ /* clean up commit list state */
+ nfs_clear_request_commit(head);
+
+ /* still holds ref on head from nfs_page_find_head_request_locked
+ * and still has lock on head from lock loop */
+ return head;
}
/*
@@ -308,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page, nonblock);
+ req = nfs_lock_and_join_requests(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -352,10 +586,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
- NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
- page->mapping->host,
- wb_priority(wbc),
- &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+ false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -398,12 +630,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+ &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(bitlock, NFS_INO_FLUSHING);
if (err < 0)
@@ -423,6 +656,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
/* Lock the request! */
nfs_lock_request(req);
@@ -439,6 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
+ /* this a head request for a page group - mark it as having an
+ * extra reference so sub groups can follow suit */
+ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -450,16 +688,23 @@ static void nfs_inode_remove_request(struct nfs_page *req)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *head;
- spin_lock(&inode->i_lock);
- if (likely(!PageSwapCache(req->wb_page))) {
- set_page_private(req->wb_page, 0);
- ClearPagePrivate(req->wb_page);
- clear_bit(PG_MAPPED, &req->wb_flags);
+ if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+ head = req->wb_head;
+
+ spin_lock(&inode->i_lock);
+ if (likely(!PageSwapCache(head->wb_page))) {
+ set_page_private(head->wb_page, 0);
+ ClearPagePrivate(head->wb_page);
+ clear_bit(PG_MAPPED, &head->wb_flags);
+ }
+ nfsi->npages--;
+ spin_unlock(&inode->i_lock);
}
- nfsi->npages--;
- spin_unlock(&inode->i_lock);
- nfs_release_request(req);
+
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ nfs_release_request(req);
}
static void
@@ -581,7 +826,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
return data->header->lseg == NULL;
@@ -612,7 +857,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
return 0;
}
@@ -643,7 +888,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
- memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+ memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
@@ -651,7 +896,7 @@ remove_req:
nfs_inode_remove_request(req);
next:
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
nfs_release_request(req);
}
out:
@@ -659,7 +904,7 @@ out:
}
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-static unsigned long
+unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return cinfo->mds->ncommit;
@@ -716,7 +961,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
}
#else
-static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -752,10 +997,14 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
+ /* should be handled by nfs_flush_incompatible */
+ WARN_ON_ONCE(req->wb_head != req);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
rqend = req->wb_offset + req->wb_bytes;
/*
* Tell the caller to flush out the request if
@@ -817,7 +1066,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, inode, page, offset, bytes);
+ req = nfs_create_request(ctx, page, NULL, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -835,7 +1084,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
return PTR_ERR(req);
/* Update file length */
nfs_grow_file(page, offset, count);
- nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
@@ -856,12 +1105,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_request(page);
+ req = nfs_page_find_head_request(page);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx;
- if (l_ctx) {
+ /* for now, flush if more than 1 request in page_group */
+ do_flush |= req->wb_this_page != req;
+ if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid;
}
@@ -874,20 +1125,77 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
}
/*
+ * Avoid buffered writes when a open context credential's key would
+ * expire soon.
+ *
+ * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
+ *
+ * Return 0 and set a credential flag which triggers the inode to flush
+ * and performs NFS_FILE_SYNC writes if the key will expired within
+ * RPC_KEY_EXPIRE_TIMEO.
+ */
+int
+nfs_key_timeout_notify(struct file *filp, struct inode *inode)
+{
+ struct nfs_open_context *ctx = nfs_file_open_context(filp);
+ struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
+
+ return rpcauth_key_timeout_notify(auth, ctx->cred);
+}
+
+/*
+ * Test if the open context credential key is marked to expire soon.
+ */
+bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
+{
+ return rpcauth_cred_key_to_expire(ctx->cred);
+}
+
+/*
* If the page cache is marked as unsafe or invalid, then we can't rely on
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
{
+ struct nfs_inode *nfsi = NFS_I(inode);
+
if (nfs_have_delegated_attributes(inode))
goto out;
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE))
+ if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ return false;
+ smp_rmb();
+ if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
return false;
out:
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ return false;
return PageUptodate(page) != 0;
}
+/* If we know the page is up to date, and we're not using byte range locks (or
+ * if we have the whole file locked for writing), it may be more efficient to
+ * extend the write to cover the entire page in order to avoid fragmentation
+ * inefficiencies.
+ *
+ * If the file is opened for synchronous writes then we can just skip the rest
+ * of the checks.
+ */
+static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
+{
+ if (file->f_flags & O_DSYNC)
+ return 0;
+ if (!nfs_write_pageuptodate(page, inode))
+ return 0;
+ if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+ return 1;
+ if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
+ inode->i_flock->fl_end == OFFSET_MAX &&
+ inode->i_flock->fl_type != F_RDLCK))
+ return 1;
+ return 0;
+}
+
/*
* Update and possibly write a cached page of an NFS file.
*
@@ -903,19 +1211,10 @@ int nfs_updatepage(struct file *file, struct page *page,
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
- dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name, count,
- (long long)(page_file_offset(page) + offset));
+ dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
+ file, count, (long long)(page_file_offset(page) + offset));
- /* If we're not using byte range locks, and we know the page
- * is up to date, it may be more efficient to extend the write
- * to cover the entire page in order to avoid fragmentation
- * inefficiencies.
- */
- if (nfs_write_pageuptodate(page, inode) &&
- inode->i_flock == NULL &&
- !(file->f_flags & O_DSYNC)) {
+ if (nfs_can_extend_write(file, page, inode)) {
count = max(count + offset, nfs_page_length(page));
offset = 0;
}
@@ -942,123 +1241,17 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = clnt,
- .task = &data->task,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | flags,
- .priority = priority,
- };
- int ret = 0;
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- dprintk("NFS: %5u initiated write call "
- "(req %s/%lld, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ task_setup_data->priority = priority;
+ NFS_PROTO(inode)->write_setup(data, msg);
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
- rpc_put_task(task);
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
-
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
- unsigned int count, unsigned int offset,
- int how, struct nfs_commit_info *cinfo)
-{
- struct nfs_page *req = data->header->req;
-
- /* Set up the RPC argument and reply structs
- * NB: take care not to mess about with data->commit et al. */
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- /* pnfs_set_layoutcommit needs this */
- data->mds_offset = data->args.offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
- data->args.stable = NFS_UNSTABLE;
- switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
- case 0:
- break;
- case FLUSH_COND_STABLE:
- if (nfs_reqs_to_commit(cinfo))
- break;
- default:
- data->args.stable = NFS_FILE_SYNC;
- }
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_write(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
-}
-
-static int nfs_do_multiple_writes(struct list_head *head,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct nfs_write_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_write(data, call_ops, how);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
+ &task_setup_data->rpc_client, msg, data);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1069,7 +1262,7 @@ static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1089,170 +1282,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.completion = nfs_write_completion,
};
-static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_write_data, list);
- list_del(&data->list);
- nfs_writedata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple small requests to write out a single
- * contiguous dirty area on one page.
- */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_write_data *data;
- size_t wsize = desc->pg_bsize, nbytes;
- unsigned int offset;
- int requests = 0;
- struct nfs_commit_info cinfo;
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
- desc->pg_count > wsize))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes, wsize);
-
- data = nfs_writedata_alloc(hdr, 1);
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- requests++;
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-/*
- * Create an RPC task for the given write request and kick it.
- * The page must have been locked by the caller.
- *
- * It may happen that the page we're passed is not marked dirty.
- * This is the case if nfs_updatepage detects a conflicting request
- * that has been written but not committed.
- */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_write_data *data;
- struct list_head *head = &desc->pg_list;
- struct nfs_commit_info cinfo;
-
- data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
- /* Set up the argument struct */
- nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc, hdr);
- return nfs_flush_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_flush);
-
-static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_write_header *whdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- whdr = nfs_writehdr_alloc();
- if (!whdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &whdr->header;
- nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&hdr->rpc_list,
- desc->pg_rpc_callops,
- desc->pg_ioflags);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_write_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
-};
-
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
- NFS_SERVER(inode)->wsize, ioflags);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+ server->wsize, ioflags);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
- NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
-}
-
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_commit_data *data = calldata;
@@ -1260,23 +1313,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- * writebacks since the page->count is kept > 1 for as long
- * as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_common(void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
@@ -1285,34 +1323,46 @@ static void nfs_writeback_release_common(void *calldata)
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
; /* Do nothing */
else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
- memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
- else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+ memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+ else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
spin_unlock(&hdr->lock);
}
- nfs_writedata_release(data);
}
-static const struct rpc_call_ops nfs_write_common_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_common,
- .rpc_release = nfs_writeback_release_common,
-};
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct nfs_writeargs *argp = &data->args;
- struct nfs_writeres *resp = &data->res;
- struct inode *inode = data->header->inode;
int status;
- dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
- task->tk_pid, task->tk_status);
-
/*
* ->write_done will attempt to use post-op attributes to detect
* conflicting writes by other clients. A strict interpretation
@@ -1322,11 +1372,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
- return;
- nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+ return status;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
- if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1342,18 +1392,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
- resp->verf->committed, argp->stable);
+ data->res.verf->committed, data->args.stable);
complain = jiffies + 300 * HZ;
}
}
#endif
- if (task->tk_status < 0)
- nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
- else if (resp->count < argp->count) {
+
+ /* Deal with the suid/sgid bit corner case */
+ if (nfs_should_remove_suid(inode))
+ nfs_mark_for_revalidate(inode);
+ return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
+
+ if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
- nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1404,7 +1467,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
{
clear_bit(NFS_INO_COMMIT, &nfsi->flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
}
@@ -1441,6 +1504,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+ nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
+ NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -1555,9 +1621,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
nfs_list_remove_request(req);
nfs_clear_page_commit(req->wb_page);
- dprintk("NFS: commit (%s/%lld %d@%lld)",
+ dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
@@ -1715,8 +1781,14 @@ int nfs_wb_all(struct inode *inode)
.range_start = 0,
.range_end = LLONG_MAX,
};
+ int ret;
+
+ trace_nfs_writeback_inode_enter(inode);
+
+ ret = sync_inode(inode, &wbc);
- return sync_inode(inode, &wbc);
+ trace_nfs_writeback_inode_exit(inode, ret);
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_wb_all);
@@ -1725,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
struct nfs_page *req;
int ret = 0;
- for (;;) {
- wait_on_page_writeback(page);
- req = nfs_page_find_request(page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req)) {
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_and_release_request(req);
- break;
- }
- ret = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (ret < 0)
- break;
+ wait_on_page_writeback(page);
+
+ /* blocking call to cancel all requests and join to a single (head)
+ * request */
+ req = nfs_lock_and_join_requests(page, false);
+
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ } else if (req) {
+ /* all requests from this page have been cancelled by
+ * nfs_lock_and_join_requests, so just remove the head
+ * request from the inode / page_private pointer and
+ * release it */
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_and_release_request(req);
}
+
return ret;
}
@@ -1764,6 +1837,8 @@ int nfs_wb_page(struct inode *inode, struct page *page)
};
int ret;
+ trace_nfs_writeback_page_enter(inode);
+
for (;;) {
wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) {
@@ -1772,14 +1847,15 @@ int nfs_wb_page(struct inode *inode, struct page *page)
goto out_error;
continue;
}
+ ret = 0;
if (!PagePrivate(page))
break;
ret = nfs_commit_inode(inode, FLUSH_SYNC);
if (ret < 0)
goto out_error;
}
- return 0;
out_error:
+ trace_nfs_writeback_page_exit(inode, ret);
return ret;
}
@@ -1808,7 +1884,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_write_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
@@ -1870,3 +1946,12 @@ void nfs_destroy_writepagecache(void)
kmem_cache_destroy(nfs_wdata_cachep);
}
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+ .rw_mode = FMODE_WRITE,
+ .rw_alloc_header = nfs_writehdr_alloc,
+ .rw_free_header = nfs_writehdr_free,
+ .rw_release = nfs_writeback_release_common,
+ .rw_done = nfs_writeback_done,
+ .rw_result = nfs_writeback_result,
+ .rw_initiate = nfs_initiate_write,
+};