diff options
Diffstat (limited to 'fs/nfs')
66 files changed, 10558 insertions, 4822 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 13ca196385f..3dece03f2fc 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -104,20 +104,29 @@ config NFS_V4_1 If unsure, say N. +config NFS_V4_2 + bool "NFS client support for NFSv4.2" + depends on NFS_V4_1 + help + This option enables support for minor version 2 of the NFSv4 protocol + in the kernel's NFS client. + + If unsure, say N. + config PNFS_FILE_LAYOUT tristate depends on NFS_V4_1 - default m + default NFS_V4 config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM - default m + default NFS_V4 config PNFS_OBJLAYOUT tristate depends on NFS_V4_1 && SCSI_OSD_ULD - default m + default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" @@ -131,6 +140,22 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN If the NFS client is unchanged from the upstream kernel, this option should be set to the default "kernel.org". +config NFS_V4_1_MIGRATION + bool "NFSv4.1 client support for migration" + depends on NFS_V4_1 + default n + help + This option makes the NFS client advertise to NFSv4.1 servers that + it can support NFSv4 migration. + + The NFSv4.1 pieces of the Linux NFSv4 migration implementation are + still experimental. If you are not an NFSv4 developer, say N here. + +config NFS_V4_SECURITY_LABEL + bool + depends on NFS_V4_2 && SECURITY + default y + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index cce2c057bd2..4782e0840dc 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -4,10 +4,10 @@ obj-$(CONFIG_NFS_FS) += nfs.o +CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ direct.o pagelist.o read.o symlink.o unlink.o \ - write.o namespace.o mount_clnt.o \ - dns_resolve.o cache_lib.o + write.o namespace.o mount_clnt.o nfstrace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o @@ -20,14 +20,15 @@ nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o obj-$(CONFIG_NFS_V4) += nfsv4.o +CFLAGS_nfs4trace.o += -I$(src) nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ - nfs4namespace.o nfs4getroot.o nfs4client.o + nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \ + dns_resolve.o nfs4trace.o +nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfsv4-$(CONFIG_NFS_V4_1) += nfs4session.o pnfs.o pnfs_dev.o - -obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o -nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 4fa788c93f4..9b431f44fad 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -134,8 +134,8 @@ bl_submit_bio(int rw, struct bio *bio) if (bio) { get_parallel(bio->bi_private); dprintk("%s submitting %s bio %u@%llu\n", __func__, - rw == READ ? "read" : "write", - bio->bi_size, (unsigned long long)bio->bi_sector); + rw == READ ? "read" : "write", bio->bi_iter.bi_size, + (unsigned long long)bio->bi_iter.bi_sector); submit_bio(rw, bio); } return NULL; @@ -156,7 +156,8 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, } if (bio) { - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_iter.bi_sector = isect - be->be_f_offset + + be->be_v_offset; bio->bi_bdev = be->be_mdev; bio->bi_end_io = end_io; bio->bi_private = par; @@ -201,19 +202,15 @@ static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, static void bl_end_io_read(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec; + int i; - do { - struct page *page = bvec->bv_page; + if (!err) + bio_for_each_segment_all(bvec, bio, i) + SetPageUptodate(bvec->bv_page); - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - if (uptodate) - SetPageUptodate(page); - } while (bvec >= bio->bi_io_vec); - if (!uptodate) { - struct nfs_read_data *rdata = par->data; + if (err) { + struct nfs_pgio_data *rdata = par->data; struct nfs_pgio_header *header = rdata->header; if (!header->pnfs_error) @@ -227,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rdata->task.tk_status = rdata->header->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); @@ -245,7 +242,7 @@ bl_end_par_io_read(void *data, int unused) } static enum pnfs_try_status -bl_read_pagelist(struct nfs_read_data *rdata) +bl_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *header = rdata->header; int i, hole; @@ -383,21 +380,17 @@ static void mark_extents_written(struct pnfs_block_layout *bl, static void bl_end_io_write_zero(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - - do { - struct page *page = bvec->bv_page; + struct bio_vec *bvec; + int i; - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); + bio_for_each_segment_all(bvec, bio, i) { /* This is the zeroing page we added */ - end_page_writeback(page); - page_cache_release(page); - } while (bvec >= bio->bi_io_vec); + end_page_writeback(bvec->bv_page); + page_cache_release(bvec->bv_page); + } - if (unlikely(!uptodate)) { - struct nfs_write_data *data = par->data; + if (unlikely(err)) { + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!header->pnfs_error) @@ -412,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_write_data *data = par->data; + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!uptodate) { @@ -430,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); if (likely(!wdata->header->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), @@ -445,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work) /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (unlikely(wdata->header->pnfs_error)) { bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, @@ -519,7 +512,7 @@ bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + (offset / SECTOR_SIZE); - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset; bio->bi_bdev = be->be_mdev; bio->bi_end_io = bl_read_single_end_io; @@ -680,7 +673,7 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_write_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) { struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; @@ -1089,9 +1082,10 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, dev->pgbase = 0; dev->pglen = PAGE_SIZE * max_pages; dev->mincount = 0; + dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); - rc = nfs4_proc_getdeviceinfo(server, dev); + rc = nfs4_proc_getdeviceinfo(server, dev, NULL); dprintk("%s getdevice info returns %d\n", __func__, rc); if (rc) { rv = ERR_PTR(rc); @@ -1195,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) pnfs_generic_pg_init_read(pgio, req); } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, SECTOR_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } @@ -1219,7 +1217,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); if (end != NFS_I(inode)->npages) { rcu_read_lock(); - end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); } @@ -1247,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, PAGE_CACHE_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } @@ -1273,6 +1275,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { static struct pnfs_layoutdriver_type blocklayout_type = { .id = LAYOUT_BLOCK_VOLUME, .name = "LAYOUT_BLOCK_VOLUME", + .owner = THIS_MODULE, .read_pagelist = bl_read_pagelist, .write_pagelist = bl_write_pagelist, .alloc_layout_hdr = bl_alloc_layout_hdr, diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index f4891bde885..9838fb02047 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -36,6 +36,7 @@ #include <linux/nfs_fs.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include "../nfs4_fs.h" #include "../pnfs.h" #include "../netns.h" @@ -173,7 +174,7 @@ struct bl_msg_hdr { /* blocklayoutdev.c */ ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); void bl_pipe_destroy_msg(struct rpc_pipe_msg *); -int nfs4_blkdev_put(struct block_device *bdev); +void nfs4_blkdev_put(struct block_device *bdev); struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, struct pnfs_device *dev); int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index a86c5bdad9e..04303b5c936 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -56,11 +56,11 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) /* * Release the block device */ -int nfs4_blkdev_put(struct block_device *bdev) +void nfs4_blkdev_put(struct block_device *bdev) { dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - return blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, FMODE_READ); } ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17..8999cfddd86 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev) bl_pipe_msg.bl_wq = &nn->bl_wq; memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + msg->len = sizeof(bl_msg) + bl_msg.totallen; + msg->data = kzalloc(msg->len, GFP_NOFS); if (!msg->data) goto out; @@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev) memcpy(msg->data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&nn->bl_wq, &wq); if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { @@ -88,14 +88,8 @@ out: */ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) { - int rv; - dprintk("%s Releasing\n", __func__); - rv = nfs4_blkdev_put(bdev->bm_mdev); - if (rv) - printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n", - __func__, rv); - + nfs4_blkdev_put(bdev->bm_mdev); dev_remove(bdev->net, bdev->bm_mdev->bd_dev); } diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed..4d016144256 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - do_div(tmp, base); + return s - sector_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 862a2f16db6..5f7b053720e 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) struct super_block *pipefs_sb; int ret = 0; + sunrpc_init_cache_detail(cd); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { ret = nfs_cache_register_sb(pipefs_sb, cd); rpc_put_sb_net(net); + if (ret) + sunrpc_destroy_cache_detail(cd); } return ret; } @@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); -} - -void nfs_cache_destroy(struct cache_detail *cd) -{ sunrpc_destroy_cache_detail(cd); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 317db95e37f..4116d2c3f52 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); extern int nfs_cache_register_sb(struct super_block *sb, diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 5088b57b078..073b4cf67ed 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -125,6 +125,9 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { @@ -161,8 +164,7 @@ nfs41_callback_up(struct svc_serv *serv) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } - dprintk("--> %s return %ld\n", __func__, - IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); + dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); return rqstp; } @@ -208,7 +210,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, struct svc_rqst *rqstp; int (*callback_svc)(void *vrqstp); struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - char svc_name[12]; int ret; nfs_callback_bc_serv(minorversion, xprt, serv); @@ -232,10 +233,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, svc_sock_update_bufs(serv); - sprintf(svc_name, "nfsv4.%u-svc", minorversion); cb_info->serv = serv; cb_info->rqst = rqstp; - cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + cb_info->task = kthread_run(callback_svc, cb_info->rqst, + "nfsv4.%u-svc", minorversion); if (IS_ERR(cb_info->task)) { ret = PTR_ERR(cb_info->task); svc_exit_thread(cb_info->rqst); @@ -279,6 +280,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n ret = nfs4_callback_up_net(serv, net); break; case 1: + case 2: ret = nfs41_callback_up_net(serv, net); break; default: diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index efd54f0a4c4..84326e9fb47 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -32,6 +32,8 @@ enum nfs4_callback_opnum { OP_CB_WANTS_CANCELLED = 12, OP_CB_NOTIFY_LOCK = 13, OP_CB_NOTIFY_DEVICEID = 14, +/* Callback operations new to NFSv4.2 */ + OP_CB_OFFLOAD = 15, OP_CB_ILLEGAL = 10044, }; @@ -39,6 +41,7 @@ struct cb_process_state { __be32 drc_status; struct nfs_client *clp; u32 slotid; + u32 minorversion; struct net *net; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 264d1aa935f..41db5258e7a 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -15,6 +15,7 @@ #include "internal.h" #include "pnfs.h" #include "nfs4session.h" +#include "nfs4trace.h" #ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -93,6 +94,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, default: res = htonl(NFS4ERR_RESOURCE); } + trace_nfs4_recall_delegation(inode, -ntohl(res)); iput(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); @@ -110,7 +112,8 @@ out: * TODO: keep track of all layouts (and delegations) in a hash table * hashed by filehandle. */ -static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) +static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, + struct nfs_fh *fh, nfs4_stateid *stateid) { struct nfs_server *server; struct inode *ino; @@ -118,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid)) + continue; if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) continue; ino = igrab(lo->plh_inode); if (!ino) - continue; + break; spin_lock(&ino->i_lock); /* Is this layout in the process of being freed? */ if (NFS_I(ino)->layout != lo) { spin_unlock(&ino->i_lock); iput(ino); - continue; + break; } pnfs_get_layout_hdr(lo); spin_unlock(&ino->i_lock); @@ -139,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, return NULL; } -static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) +static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, + struct nfs_fh *fh, nfs4_stateid *stateid) { struct pnfs_layout_hdr *lo; spin_lock(&clp->cl_lock); rcu_read_lock(); - lo = get_layout_by_fh_locked(clp, fh); + lo = get_layout_by_fh_locked(clp, fh, stateid); rcu_read_unlock(); spin_unlock(&clp->cl_lock); @@ -160,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp, u32 rv = NFS4ERR_NOMATCHING_LAYOUT; LIST_HEAD(free_me_list); - lo = get_layout_by_fh(clp, &args->cbl_fh); + lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); if (!lo) - return NFS4ERR_NOMATCHING_LAYOUT; + goto out; ino = lo->plh_inode; spin_lock(&ino->i_lock); @@ -177,66 +183,22 @@ static u32 initiate_file_draining(struct nfs_client *clp, pnfs_free_lseg_list(&free_me_list); pnfs_put_layout_hdr(lo); iput(ino); +out: return rv; } static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - struct nfs_server *server; - struct pnfs_layout_hdr *lo; - struct inode *ino; - u32 rv = NFS4ERR_NOMATCHING_LAYOUT; - struct pnfs_layout_hdr *tmp; - LIST_HEAD(recall_list); - LIST_HEAD(free_me_list); - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; - - spin_lock(&clp->cl_lock); - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&server->fsid, &args->cbl_fsid, - sizeof(struct nfs_fsid))) - continue; - - list_for_each_entry(lo, &server->layouts, plh_layouts) { - ino = igrab(lo->plh_inode); - if (!ino) - continue; - spin_lock(&ino->i_lock); - /* Is this layout in the process of being freed? */ - if (NFS_I(ino)->layout != lo) { - spin_unlock(&ino->i_lock); - iput(ino); - continue; - } - pnfs_get_layout_hdr(lo); - spin_unlock(&ino->i_lock); - list_add(&lo->plh_bulk_recall, &recall_list); - } - } - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); + int stat; - list_for_each_entry_safe(lo, tmp, - &recall_list, plh_bulk_recall) { - ino = lo->plh_inode; - spin_lock(&ino->i_lock); - set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) - rv = NFS4ERR_DELAY; - list_del_init(&lo->plh_bulk_recall); - spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&free_me_list); - pnfs_put_layout_hdr(lo); - iput(ino); - } - return rv; + if (args->cbl_recall_type == RETURN_FSID) + stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); + else + stat = pnfs_destroy_layouts_byclid(clp, true); + if (stat != 0) + return NFS4ERR_DELAY; + return NFS4ERR_NOMATCHING_LAYOUT; } static u32 do_callback_layoutrecall(struct nfs_client *clp, @@ -346,14 +308,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) { struct nfs4_slot *slot; - dprintk("%s enter. slotid %d seqid %d\n", + dprintk("%s enter. slotid %u seqid %u\n", __func__, args->csa_slotid, args->csa_sequenceid); if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); slot = tbl->slots + args->csa_slotid; - dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); + dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { @@ -363,7 +325,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { - dprintk("%s seqid %d is a replay\n", + dprintk("%s seqid %u is a replay\n", __func__, args->csa_sequenceid); /* Signal process_op to set this error on next op */ if (args->csa_cachethis == 0) @@ -451,7 +413,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, int i; __be32 status = htonl(NFS4ERR_BADSESSION); - clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); + clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, + &args->csa_sessionid, cps->minorversion); if (clp == NULL) goto out; @@ -459,7 +422,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ - if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { + if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session @@ -506,6 +469,7 @@ out: } else res->csr_status = status; + trace_nfs4_cb_sequence(args, res, status); dprintk("%s: exit with status = %d res->csr_status %d\n", __func__, ntohl(status), ntohl(res->csr_status)); return status; @@ -545,7 +509,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, &args->craa_type_mask)) pnfs_recall_all_layouts(cps->clp); if (flags) - nfs_expire_all_delegation_types(cps->clp, flags); + nfs_expire_unused_delegation_types(cps->clp, flags); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; @@ -562,7 +526,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %u\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_highest_slotid); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 59461c957d9..f4ccfe6521e 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -166,9 +166,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); hdr->minorversion = ntohl(*p++); - /* Check minor version is zero or one. */ - if (hdr->minorversion <= 1) { - hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ + /* Check for minor version support */ + if (hdr->minorversion <= NFS4_MAX_MINOR_VERSION) { + hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 and v4.2 */ } else { pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " "illegal minor version %u!\n", @@ -763,7 +763,7 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * A single slot, so highest used slotid is either 0 or -1 */ tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_session_drain_complete(session, tbl); + nfs4_slot_tbl_drain_complete(tbl); spin_unlock(&tbl->slot_tbl_lock); } @@ -786,6 +786,26 @@ static void nfs4_cb_free_slot(struct cb_process_state *cps) } #endif /* CONFIG_NFS_V4_1 */ +#ifdef CONFIG_NFS_V4_2 +static __be32 +preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + __be32 status = preprocess_nfs41_op(nop, op_nr, op); + if (status != htonl(NFS4ERR_OP_ILLEGAL)) + return status; + + if (op_nr == OP_CB_OFFLOAD) + return htonl(NFS4ERR_NOTSUPP); + return htonl(NFS4ERR_OP_ILLEGAL); +} +#else /* CONFIG_NFS_V4_2 */ +static __be32 +preprocess_nfs42_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); +} +#endif /* CONFIG_NFS_V4_2 */ + static __be32 preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) { @@ -801,8 +821,7 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) return htonl(NFS_OK); } -static __be32 process_op(uint32_t minorversion, int nop, - struct svc_rqst *rqstp, +static __be32 process_op(int nop, struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp, struct cb_process_state *cps) @@ -819,10 +838,22 @@ static __be32 process_op(uint32_t minorversion, int nop, return status; dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", - __func__, minorversion, nop, op_nr); + __func__, cps->minorversion, nop, op_nr); + + switch (cps->minorversion) { + case 0: + status = preprocess_nfs4_op(op_nr, &op); + break; + case 1: + status = preprocess_nfs41_op(nop, op_nr, &op); + break; + case 2: + status = preprocess_nfs42_op(nop, op_nr, &op); + break; + default: + status = htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } - status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : - preprocess_nfs4_op(op_nr, &op); if (status == htonl(NFS4ERR_OP_ILLEGAL)) op_nr = OP_CB_ILLEGAL; if (status) @@ -885,14 +916,15 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_drop_reply; } + cps.minorversion = hdr_arg.minorversion; hdr_res.taglen = hdr_arg.taglen; hdr_res.tag = hdr_arg.tag; if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) return rpc_system_err; while (status == 0 && nops != hdr_arg.nops) { - status = process_op(hdr_arg.minorversion, nops, rqstp, - &xdr_in, argp, &xdr_out, resp, &cps); + status = process_op(nops, rqstp, &xdr_in, + argp, &xdr_out, resp, &cps); nops++; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9f3c66438d0..1d09289c8f0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -197,7 +197,6 @@ error_0: EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) -/* idr_remove_all is not needed as all id's are removed by nfs_put_client */ void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); @@ -502,8 +501,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; - return rpc_ops->init_client(new, timeparms, ip_addr, - authflavour); + return rpc_ops->init_client(new, timeparms, ip_addr); } spin_unlock(&nn->nfs_client_lock); @@ -592,8 +590,12 @@ int nfs_create_rpc_client(struct nfs_client *clp, if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_DISCRTRY; + if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_INFINITE_SLOTS; if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -693,13 +695,12 @@ EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); * @clp: nfs_client to initialise * @timeparms: timeout parameters for underlying RPC transport * @ip_addr: IP presentation address (not used) - * @authflavor: authentication flavor for underlying RPC transport * * Returns pointer to an NFS client, or an ERR_PTR value. */ struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour) + const char *ip_addr) { int error; @@ -752,8 +753,6 @@ static int nfs_init_server(struct nfs_server *server, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - if (server->options & NFS_OPTION_MIGRATION) - set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); @@ -787,8 +786,10 @@ static int nfs_init_server(struct nfs_server *server, goto error; server->port = data->nfs_server.port; + server->auth_info = data->auth_info; - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, + data->selected_flavor); if (error < 0) goto error; @@ -929,6 +930,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acdirmax = source->acdirmax; target->caps = source->caps; target->options = source->options; + target->auth_info = source->auth_info; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); @@ -946,7 +948,7 @@ void nfs_server_insert_lists(struct nfs_server *server) } EXPORT_SYMBOL_GPL(nfs_server_insert_lists); -static void nfs_server_remove_lists(struct nfs_server *server) +void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn; @@ -963,6 +965,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nfs_server_remove_lists); /* * Allocate and initialise a server record @@ -1075,7 +1078,7 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr); + error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 81c5eec3cf3..5d8ccecf5f5 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -20,6 +20,7 @@ #include "nfs4_fs.h" #include "delegation.h" #include "internal.h" +#include "nfs4trace.h" static void nfs_free_delegation(struct nfs_delegation *delegation) { @@ -55,7 +56,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL && (delegation->type & flags) == flags) { + if (delegation != NULL && (delegation->type & flags) == flags && + !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { nfs_mark_delegation_referenced(delegation); ret = 1; } @@ -63,7 +65,7 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) return ret; } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) +static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -72,20 +74,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ if (inode->i_flock == NULL) goto out; - /* Protect inode->i_flock using the file locks lock */ - lock_flocks(); + /* Protect inode->i_flock using the i_lock */ + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file) != ctx) continue; - unlock_flocks(); - status = nfs4_lock_delegation_recall(state, fl); + spin_unlock(&inode->i_lock); + status = nfs4_lock_delegation_recall(fl, state, stateid); if (status < 0) goto out; - lock_flocks(); + spin_lock(&inode->i_lock); } - unlock_flocks(); + spin_unlock(&inode->i_lock); out: return status; } @@ -94,7 +96,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; + struct nfs4_state_owner *sp; struct nfs4_state *state; + unsigned int seq; int err; again: @@ -109,9 +113,16 @@ again: continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); + sp = state->owner; + /* Block nfs4_proc_unlck */ + mutex_lock(&sp->so_delegreturn_mutex); + seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid); - if (err >= 0) - err = nfs_delegation_claim_locks(ctx, state); + if (!err) + err = nfs_delegation_claim_locks(ctx, state, stateid); + if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) + err = -EAGAIN; + mutex_unlock(&sp->so_delegreturn_mutex); put_nfs_open_context(ctx); if (err != 0) return err; @@ -150,6 +161,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, spin_unlock(&delegation->lock); put_rpccred(oldcred); rcu_read_unlock(); + trace_nfs4_reclaim_delegation(inode, res->delegation_type); } else { /* We appear to have raced with a delegation return. */ spin_unlock(&delegation->lock); @@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation } static struct nfs_delegation * +nfs_start_delegation_return_locked(struct nfs_inode *nfsi) +{ + struct nfs_delegation *ret = NULL; + struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + + if (delegation == NULL) + goto out; + spin_lock(&delegation->lock); + if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + ret = delegation; + spin_unlock(&delegation->lock); +out: + return ret; +} + +static struct nfs_delegation * +nfs_start_delegation_return(struct nfs_inode *nfsi) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + delegation = nfs_start_delegation_return_locked(nfsi); + rcu_read_unlock(); + return delegation; +} + +static void +nfs_abort_delegation_return(struct nfs_delegation *delegation, + struct nfs_client *clp) +{ + + spin_lock(&delegation->lock); + clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + spin_unlock(&delegation->lock); + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); +} + +static struct nfs_delegation * nfs_detach_delegation_locked(struct nfs_inode *nfsi, - struct nfs_server *server) + struct nfs_delegation *delegation, + struct nfs_client *clp) { - struct nfs_delegation *delegation = + struct nfs_delegation *deleg_cur = rcu_dereference_protected(nfsi->delegation, - lockdep_is_held(&server->nfs_client->cl_lock)); + lockdep_is_held(&clp->cl_lock)); - if (delegation == NULL) - goto nomatch; + if (deleg_cur == NULL || delegation != deleg_cur) + return NULL; spin_lock(&delegation->lock); + set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); list_del_rcu(&delegation->super_list); delegation->inode = NULL; nfsi->delegation_state = 0; rcu_assign_pointer(nfsi->delegation, NULL); spin_unlock(&delegation->lock); return delegation; -nomatch: - return NULL; } static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, - struct nfs_server *server) + struct nfs_delegation *delegation, + struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_delegation *delegation; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, server); + delegation = nfs_detach_delegation_locked(nfsi, delegation, clp); spin_unlock(&clp->cl_lock); return delegation; } +static struct nfs_delegation * +nfs_inode_detach_delegation(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_delegation *delegation; + + delegation = nfs_start_delegation_return(nfsi); + if (delegation == NULL) + return NULL; + return nfs_detach_delegation(nfsi, delegation, server); +} + /** * nfs_inode_set_delegation - set up a delegation on an inode * @inode: inode to which delegation applies @@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, server); + freeme = nfs_detach_delegation_locked(nfsi, + old_delegation, clp); + if (freeme == NULL) + goto out; } list_add_rcu(&delegation->super_list, &server->delegations); nfsi->delegation_state = delegation->type; @@ -279,6 +346,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_REVAL_FORCED; spin_unlock(&inode->i_lock); + trace_nfs4_set_delegation(inode, res->delegation_type); out: spin_unlock(&clp->cl_lock); @@ -292,19 +360,29 @@ out: /* * Basic procedure for returning a delegation to the server */ -static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) +static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync) { + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); int err; - /* - * Guard against new delegated open/lock/unlock calls and against - * state recovery - */ - down_write(&nfsi->rwsem); - err = nfs_delegation_claim_opens(inode, &delegation->stateid); - up_write(&nfsi->rwsem); - if (err) + if (delegation == NULL) + return 0; + do { + err = nfs_delegation_claim_opens(inode, &delegation->stateid); + if (!issync || err != -EAGAIN) + break; + /* + * Guard against state recovery + */ + err = nfs4_wait_clnt_recover(clp); + } while (err == 0); + + if (err) { + nfs_abort_delegation_return(delegation, clp); + goto out; + } + if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode))) goto out; err = nfs_do_return_delegation(inode, delegation, issync); @@ -312,6 +390,24 @@ out: return err; } +static bool nfs_delegation_need_return(struct nfs_delegation *delegation) +{ + bool ret = false; + + if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) + ret = true; + if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + struct inode *inode; + + spin_lock(&delegation->lock); + inode = delegation->inode; + if (inode && list_empty(&NFS_I(inode)->open_files)) + ret = true; + spin_unlock(&delegation->lock); + } + return ret; +} + /** * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process @@ -334,19 +430,15 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (!test_and_clear_bit(NFS_DELEGATION_RETURN, - &delegation->flags)) + if (!nfs_delegation_need_return(delegation)) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) continue; - delegation = nfs_detach_delegation(NFS_I(inode), - server); + delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); - if (delegation != NULL) - err = __nfs_inode_return_delegation(inode, - delegation, 0); + err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); if (!err) goto restart; @@ -367,15 +459,11 @@ restart: */ void nfs_inode_return_delegation_noreclaim(struct inode *inode) { - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - if (rcu_access_pointer(nfsi->delegation) != NULL) { - delegation = nfs_detach_delegation(nfsi, server); - if (delegation != NULL) - nfs_do_return_delegation(inode, delegation, 0); - } + delegation = nfs_inode_detach_delegation(inode); + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation, 0); } /** @@ -390,21 +478,24 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) */ int nfs4_inode_return_delegation(struct inode *inode) { - struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; int err = 0; nfs_wb_all(inode); - if (rcu_access_pointer(nfsi->delegation) != NULL) { - delegation = nfs_detach_delegation(nfsi, server); - if (delegation != NULL) { - err = __nfs_inode_return_delegation(inode, delegation, 1); - } - } + delegation = nfs_start_delegation_return(nfsi); + if (delegation != NULL) + err = nfs_end_delegation_return(inode, delegation, 1); return err; } +static void nfs_mark_return_if_closed_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); +} + static void nfs_mark_return_delegation(struct nfs_server *server, struct nfs_delegation *delegation) { @@ -412,6 +503,45 @@ static void nfs_mark_return_delegation(struct nfs_server *server, set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } +static bool nfs_server_mark_return_all_delegations(struct nfs_server *server) +{ + struct nfs_delegation *delegation; + bool ret = false; + + list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + nfs_mark_return_delegation(server, delegation); + ret = true; + } + return ret; +} + +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +{ + struct nfs_server *server; + + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + nfs_server_mark_return_all_delegations(server); + rcu_read_unlock(); +} + +static void nfs_delegation_run_state_manager(struct nfs_client *clp) +{ + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); +} + +/** + * nfs_expire_all_delegations + * @clp: client to process + * + */ +void nfs_expire_all_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_all_delegations(clp); + nfs_delegation_run_state_manager(clp); +} + /** * nfs_super_return_all_delegations - return delegations for one superblock * @sb: sb to process @@ -420,24 +550,22 @@ static void nfs_mark_return_delegation(struct nfs_server *server, void nfs_server_return_all_delegations(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; - struct nfs_delegation *delegation; + bool need_wait; if (clp == NULL) return; rcu_read_lock(); - list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - spin_lock(&delegation->lock); - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - spin_unlock(&delegation->lock); - } + need_wait = nfs_server_mark_return_all_delegations(server); rcu_read_unlock(); - if (nfs_client_return_marked_delegations(clp) != 0) + if (need_wait) { nfs4_schedule_state_manager(clp); + nfs4_wait_clnt_recover(clp); + } } -static void nfs_mark_return_all_delegation_types(struct nfs_server *server, +static void nfs_mark_return_unused_delegation_types(struct nfs_server *server, fmode_t flags) { struct nfs_delegation *delegation; @@ -446,32 +574,26 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } -static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, +static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { struct nfs_server *server; rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - nfs_mark_return_all_delegation_types(server, flags); + nfs_mark_return_unused_delegation_types(server, flags); rcu_read_unlock(); } -static void nfs_delegation_run_state_manager(struct nfs_client *clp) -{ - if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) - nfs4_schedule_state_manager(clp); -} - void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; - delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); + delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); nfs_free_delegation(delegation); @@ -480,27 +602,17 @@ void nfs_remove_bad_delegation(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); /** - * nfs_expire_all_delegation_types + * nfs_expire_unused_delegation_types * @clp: client to process * @flags: delegation types to expire * */ -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags) { - nfs_client_mark_return_all_delegation_types(clp, flags); + nfs_client_mark_return_unused_delegation_types(clp, flags); nfs_delegation_run_state_manager(clp); } -/** - * nfs_expire_all_delegations - * @clp: client to process - * - */ -void nfs_expire_all_delegations(struct nfs_client *clp) -{ - nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); -} - static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -508,7 +620,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(server, delegation); + nfs_mark_return_if_closed_delegation(server, delegation); } } @@ -547,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * @@ -649,7 +764,7 @@ restart: if (inode == NULL) continue; delegation = nfs_detach_delegation(NFS_I(inode), - server); + delegation, server); rcu_read_unlock(); if (delegation != NULL) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index bbc6a4dba0d..9a79c7a99d6 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -28,7 +28,9 @@ struct nfs_delegation { enum { NFS_DELEGATION_NEED_RECLAIM = 0, NFS_DELEGATION_RETURN, + NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, + NFS_DELEGATION_RETURNING, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -40,7 +42,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_server_return_all_delegations(struct nfs_server *); void nfs_expire_all_delegations(struct nfs_client *clp); -void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); +void nfs_expire_unused_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); @@ -52,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1b2d7eb9379..4a3d4ef7612 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -33,6 +33,7 @@ #include <linux/pagevec.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/swap.h> #include <linux/sched.h> #include <linux/kmemleak.h> #include <linux/xattr.h> @@ -42,11 +43,13 @@ #include "internal.h" #include "fscache.h" +#include "nfstrace.h" + /* #define NFS_DEBUG_VERBOSE 1 */ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); -static int nfs_readdir(struct file *, void *, filldir_t); +static int nfs_readdir(struct file *, struct dir_context *); static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); static loff_t nfs_llseek_dir(struct file *, loff_t, int); static void nfs_readdir_clear_array(struct page*); @@ -54,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .readdir = nfs_readdir, + .iterate = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, @@ -66,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = { static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) { + struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->duped = 0; - ctx->attr_gencount = NFS_I(dir)->attr_gencount; + ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; ctx->cred = get_rpccred(cred); + spin_lock(&dir->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&dir->i_lock); return ctx; } return ERR_PTR(-ENOMEM); } -static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) +static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) { + spin_lock(&dir->i_lock); + list_del(&ctx->list); + spin_unlock(&dir->i_lock); put_rpccred(ctx->cred); kfree(ctx); } @@ -95,9 +105,7 @@ nfs_opendir(struct inode *inode, struct file *filp) struct nfs_open_dir_context *ctx; struct rpc_cred *cred; - dfprintk(FILE, "NFS: open dir(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); @@ -125,7 +133,7 @@ out: static int nfs_closedir(struct inode *inode, struct file *filp) { - put_nfs_open_dir_context(filp->private_data); + put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data); return 0; } @@ -147,6 +155,7 @@ typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); typedef struct { struct file *file; struct page *page; + struct dir_context *ctx; unsigned long page_index; u64 *dir_cookie; u64 last_cookie; @@ -252,7 +261,7 @@ out: static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) { - loff_t diff = desc->file->f_pos - desc->current_index; + loff_t diff = desc->ctx->pos - desc->current_index; unsigned int index; if (diff < 0) @@ -272,6 +281,15 @@ out_eof: return -EBADCOOKIE; } +static bool +nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) +{ + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + return false; + smp_rmb(); + return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); +} + static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) { @@ -281,25 +299,23 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des for (i = 0; i < array->size; i++) { if (array->array[i].cookie == *desc->dir_cookie) { - struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode); + struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); struct nfs_open_dir_context *ctx = desc->file->private_data; new_pos = desc->current_index + i; - if (ctx->attr_gencount != nfsi->attr_gencount - || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { + if (ctx->attr_gencount != nfsi->attr_gencount || + !nfs_readdir_inode_mapping_valid(nfsi)) { ctx->duped = 0; ctx->attr_gencount = nfsi->attr_gencount; - } else if (new_pos < desc->file->f_pos) { + } else if (new_pos < desc->ctx->pos) { if (ctx->duped > 0 && ctx->dup_cookie == *desc->dir_cookie) { if (printk_ratelimit()) { - pr_notice("NFS: directory %s/%s contains a readdir loop." + pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " - "The file: %s has duplicate cookie %llu\n", - desc->file->f_dentry->d_parent->d_name.name, - desc->file->f_dentry->d_name.name, - array->array[i].string.name, - *desc->dir_cookie); + "The file: %.*s has duplicate cookie %llu\n", + desc->file, array->array[i].string.len, + array->array[i].string.name, *desc->dir_cookie); } status = -ELOOP; goto out; @@ -307,7 +323,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des ctx->dup_cookie = *desc->dir_cookie; ctx->duped = -1; } - desc->file->f_pos = new_pos; + desc->ctx->pos = new_pos; desc->cache_entry_index = i; return 0; } @@ -405,13 +421,13 @@ different: } static -bool nfs_use_readdirplus(struct inode *dir, struct file *filp) +bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) { if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) return false; if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) return true; - if (filp->f_pos == 0) + if (ctx->pos == 0) return true; return false; } @@ -427,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); } +/* + * This function is mainly for use by nfs_getattr(). + * + * If this is an 'ls -l', we want to force use of readdirplus. + * Do this by checking if there is an active file descriptor + * and calling nfs_advise_use_readdirplus, then forcing a + * cache flush. + */ +void nfs_force_use_readdirplus(struct inode *dir) +{ + if (!list_empty(&NFS_I(dir)->open_files)) { + nfs_advise_use_readdirplus(dir); + nfs_zap_mapping(dir, dir->i_mapping); + } +} + static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { @@ -435,6 +467,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) struct dentry *alias; struct inode *dir = parent->d_inode; struct inode *inode; + int status; if (filename.name[0] == '.') { if (filename.len == 1) @@ -447,7 +480,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) dentry = d_lookup(parent, &filename); if (dentry != NULL) { if (nfs_same_file(dentry, entry)) { - nfs_refresh_inode(dentry->d_inode, entry->fattr); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + status = nfs_refresh_inode(dentry->d_inode, entry->fattr); + if (!status) + nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); goto out; } else { if (d_invalidate(dentry) != 0) @@ -460,7 +496,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) if (dentry == NULL) return; - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); + inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); if (IS_ERR(inode)) goto out; @@ -585,10 +621,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, if (entry.fh == NULL || entry.fattr == NULL) goto out; + entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); + if (IS_ERR(entry.label)) { + status = PTR_ERR(entry.label); + goto out; + } + array = nfs_readdir_get_array(page); if (IS_ERR(array)) { status = PTR_ERR(array); - goto out; + goto out_label_free; } memset(array, 0, sizeof(struct nfs_cache_array)); array->eof_index = -1; @@ -614,6 +656,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, nfs_readdir_free_large_page(pages_ptr, pages, array_size); out_release_array: nfs_readdir_release_array(page); +out_label_free: + nfs4_label_free(entry.label); out: nfs_free_fattr(entry.fattr); nfs_free_fhandle(entry.fh); @@ -629,7 +673,7 @@ out: static int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) { - struct inode *inode = desc->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(desc->file); int ret; ret = nfs_readdir_xdr_to_array(desc, page, inode); @@ -660,7 +704,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, + return read_cache_page(file_inode(desc->file)->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); } @@ -702,8 +746,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) * Once we've found the start of the dirent within a page: fill 'er up... */ static -int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, - filldir_t filldir) +int nfs_do_filldir(nfs_readdir_descriptor_t *desc) { struct file *file = desc->file; int i = 0; @@ -721,13 +764,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct nfs_cache_array_entry *ent; ent = &array->array[i]; - if (filldir(dirent, ent->string.name, ent->string.len, - file->f_pos, nfs_compat_user_ino64(ent->ino), - ent->d_type) < 0) { + if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, + nfs_compat_user_ino64(ent->ino), ent->d_type)) { desc->eof = 1; break; } - file->f_pos++; + desc->ctx->pos++; if (i < (array->size-1)) *desc->dir_cookie = array->array[i+1].cookie; else @@ -759,12 +801,11 @@ out: * directory in the page cache by the time we get here. */ static inline -int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, - filldir_t filldir) +int uncached_readdir(nfs_readdir_descriptor_t *desc) { struct page *page = NULL; int status; - struct inode *inode = desc->file->f_path.dentry->d_inode; + struct inode *inode = file_inode(desc->file); struct nfs_open_dir_context *ctx = desc->file->private_data; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", @@ -785,7 +826,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, if (status < 0) goto out_release; - status = nfs_do_filldir(desc, dirent, filldir); + status = nfs_do_filldir(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", @@ -796,39 +837,51 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } +static bool nfs_dir_mapping_need_revalidate(struct inode *dir) +{ + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_attribute_cache_expired(dir)) + return true; + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + return true; + return false; +} + /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. */ -static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +static int nfs_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = filp->f_path.dentry; + struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; - struct nfs_open_dir_context *dir_ctx = filp->private_data; - int res; + struct nfs_open_dir_context *dir_ctx = file->private_data; + int res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (long long)filp->f_pos); + dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", + file, (long long)ctx->pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); /* - * filp->f_pos points to the dirent entry number. + * ctx->pos points to the dirent entry number. * *desc->dir_cookie has the cookie for the next entry. We have * to either find the entry with the appropriate number or * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); - desc->file = filp; + desc->file = file; + desc->ctx = ctx; desc->dir_cookie = &dir_ctx->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; - desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0; + desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping(inode, filp->f_mapping); + if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) + res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; @@ -840,7 +893,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* This means either end of directory */ if (*desc->dir_cookie && desc->eof == 0) { /* Or that the server has 'lost' a cookie */ - res = uncached_readdir(desc, dirent, filldir); + res = uncached_readdir(desc); if (res == 0) continue; } @@ -857,7 +910,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (res < 0) break; - res = nfs_do_filldir(desc, dirent, filldir); + res = nfs_do_filldir(desc); if (res < 0) break; } while (!desc->eof); @@ -865,22 +918,17 @@ out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; - dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - res); + dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; - dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name, - offset, whence); + dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", + filp, offset, whence); mutex_lock(&inode->i_mutex); switch (whence) { @@ -910,15 +958,12 @@ out: static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); - dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); mutex_lock(&inode->i_mutex); - nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); mutex_unlock(&inode->i_mutex); return 0; } @@ -1040,6 +1085,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; + struct nfs4_label *label = NULL; int error; if (flags & LOOKUP_RCU) @@ -1057,9 +1103,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); goto out_bad; } @@ -1082,7 +1127,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (fhandle == NULL || fattr == NULL) goto out_error; - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); + if (IS_ERR(label)) + goto out_error; + + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); if (error) goto out_bad; if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1090,8 +1141,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if ((error = nfs_refresh_inode(inode, fattr)) != 0) goto out_bad; + nfs_setsecurity(inode, fattr, label); + nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); + nfs4_label_free(label); + out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: @@ -1099,50 +1154,90 @@ out_set_verifier: nfs_advise_use_readdirplus(dir); out_valid_noent: dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", + __func__, dentry); return 1; out_zap_parent: nfs_zap_caches(dir); out_bad: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); + nfs4_label_free(label); nfs_mark_for_revalidate(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); - /* If we have submounts, don't unhash ! */ - if (have_submounts(dentry)) - goto out_valid; - if (dentry->d_flags & DCACHE_DISCONNECTED) + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (IS_ROOT(dentry)) goto out_valid; - shrink_dcache_parent(dentry); } - d_drop(dentry); + /* If we have submounts, don't unhash ! */ + if (check_submounts_and_drop(dentry) != 0) + goto out_valid; + dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", + __func__, dentry); return 0; out_error: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); + nfs4_label_free(label); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", - __func__, dentry->d_parent->d_name.name, - dentry->d_name.name, error); + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", + __func__, dentry, error); return error; } /* + * A weaker form of d_revalidate for revalidating just the dentry->d_inode + * when we don't really care about the dentry name. This is called when a + * pathwalk ends on a dentry that was not found via a normal lookup in the + * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals). + * + * In this situation, we just want to verify that the inode itself is OK + * since the dentry might have changed on the server. + */ +static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) +{ + int error; + struct inode *inode = dentry->d_inode; + + /* + * I believe we can only get a negative dentry here in the case of a + * procfs-style symlink. Just assume it's correct for now, but we may + * eventually need to do something more here. + */ + if (!inode) { + dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n", + __func__, dentry); + return 1; + } + + if (is_bad_inode(inode)) { + dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", + __func__, dentry); + return 0; + } + + error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", + __func__, inode->i_ino, error ? "invalid" : "valid"); + return !error; +} + +/* * This is called from dput() when d_count is going to 0. */ static int nfs_dentry_delete(const struct dentry *dentry) { - dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - dentry->d_flags); + dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n", + dentry, dentry->d_flags); /* Unhash any dentry with a stale inode */ if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) @@ -1202,6 +1297,7 @@ static void nfs_d_release(struct dentry *dentry) const struct dentry_operations nfs_dentry_operations = { .d_revalidate = nfs_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -1216,10 +1312,10 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct inode *inode = NULL; struct nfs_fh *fhandle = NULL; struct nfs_fattr *fattr = NULL; + struct nfs4_label *label = NULL; int error; - dfprintk(VFS, "NFS: lookup(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); @@ -1242,17 +1338,22 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (fhandle == NULL || fattr == NULL) goto out; + label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT); + if (IS_ERR(label)) + goto out; + parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ + trace_nfs_lookup_enter(dir, dentry, flags); nfs_block_sillyrename(parent); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; if (error < 0) { res = ERR_PTR(error); goto out_unblock_sillyrename; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); res = ERR_CAST(inode); if (IS_ERR(res)) goto out_unblock_sillyrename; @@ -1270,6 +1371,8 @@ no_entry: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: nfs_unblock_sillyrename(parent); + trace_nfs_lookup_exit(dir, dentry, flags, error); + nfs4_label_free(label); out: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); @@ -1306,7 +1409,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i static int do_open(struct inode *inode, struct file *filp) { - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); return 0; } @@ -1317,17 +1420,8 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; - if (ctx->dentry != dentry) { - dput(ctx->dentry); - ctx->dentry = dget(dentry); - } - - /* If the open_intent is for execute, we have an extra check to make */ - if (ctx->mode & FMODE_EXEC) { - err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); - if (err < 0) - goto out; - } + if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + *opened |= FILE_CREATED; err = finish_open(file, dentry, do_open, opened); if (err) @@ -1335,7 +1429,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, nfs_file_set_open_context(file, ctx); out: - put_nfs_open_context(ctx); return err; } @@ -1347,13 +1440,18 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; + unsigned int lookup_flags = 0; int err; /* Expect a negative dentry */ BUG_ON(dentry->d_inode); - dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); + + err = nfs_check_flags(open_flags); + if (err) + return err; /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { @@ -1365,6 +1463,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, */ return -ENOENT; } + lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY; goto no_open; } @@ -1385,15 +1484,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, if (IS_ERR(ctx)) goto out; + trace_nfs_atomic_open_enter(dir, ctx, open_flags); nfs_block_sillyrename(dentry->d_parent); - inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); - d_drop(dentry); + inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); + nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { - nfs_unblock_sillyrename(dentry->d_parent); - put_nfs_open_context(ctx); err = PTR_ERR(inode); + trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); + put_nfs_open_context(ctx); switch (err) { case -ENOENT: + d_drop(dentry); d_add(dentry, NULL); break; case -EISDIR: @@ -1409,21 +1510,15 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, } goto out; } - res = d_add_unique(dentry, inode); - if (res != NULL) - dentry = res; - - nfs_unblock_sillyrename(dentry->d_parent); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - - err = nfs_finish_open(ctx, dentry, file, open_flags, opened); - dput(res); + err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened); + trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); + put_nfs_open_context(ctx); out: return err; no_open: - res = nfs_lookup(dir, dentry, 0); + res = nfs_lookup(dir, dentry, lookup_flags); err = PTR_ERR(res); if (IS_ERR(res)) goto out; @@ -1446,6 +1541,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; if (d_mountpoint(dentry)) goto no_open; + if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) + goto no_open; inode = dentry->d_inode; parent = dget_parent(dentry); @@ -1486,7 +1583,8 @@ no_open: * Code common to create, mkdir, and mknod. */ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, + struct nfs4_label *label) { struct dentry *parent = dget_parent(dentry); struct inode *dir = parent->d_inode; @@ -1499,18 +1597,18 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (dentry->d_inode) goto out; if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); if (error) goto out_error; } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); - error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); + error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr, NULL); if (error < 0) goto out_error; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr); + inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label); error = PTR_ERR(inode); if (IS_ERR(inode)) goto out_error; @@ -1538,13 +1636,15 @@ int nfs_create(struct inode *dir, struct dentry *dentry, int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: create(%s/%lu), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + trace_nfs_create_enter(dir, dentry, open_flags); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + trace_nfs_create_exit(dir, dentry, open_flags, error); if (error != 0) goto out_err; return 0; @@ -1563,8 +1663,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1572,7 +1672,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; + trace_nfs_mknod_enter(dir, dentry); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); + trace_nfs_mknod_exit(dir, dentry, status); if (status != 0) goto out_err; return 0; @@ -1590,13 +1692,15 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; + trace_nfs_mkdir_enter(dir, dentry); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + trace_nfs_mkdir_exit(dir, dentry, error); if (error != 0) goto out_err; return 0; @@ -1616,15 +1720,24 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n", + dir->i_sb->s_id, dir->i_ino, dentry); - error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - /* Ensure the VFS deletes this inode */ - if (error == 0 && dentry->d_inode != NULL) - clear_nlink(dentry->d_inode); - else if (error == -ENOENT) - nfs_dentry_handle_enoent(dentry); + trace_nfs_rmdir_enter(dir, dentry); + if (dentry->d_inode) { + nfs_wait_on_sillyrename(dentry); + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + /* Ensure the VFS deletes this inode */ + switch (error) { + case 0: + clear_nlink(dentry->d_inode); + break; + case -ENOENT: + nfs_dentry_handle_enoent(dentry); + } + } else + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); + trace_nfs_rmdir_exit(dir, dentry, error); return error; } @@ -1643,8 +1756,7 @@ static int nfs_safe_remove(struct dentry *dentry) struct inode *inode = dentry->d_inode; int error = -EBUSY; - dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry); /* If the dentry was sillyrenamed, we simply call d_delete() */ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { @@ -1652,6 +1764,7 @@ static int nfs_safe_remove(struct dentry *dentry) goto out; } + trace_nfs_remove_enter(dir, dentry); if (inode != NULL) { NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); @@ -1661,6 +1774,7 @@ static int nfs_safe_remove(struct dentry *dentry) error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) nfs_dentry_handle_enoent(dentry); + trace_nfs_remove_exit(dir, dentry, error); out: return error; } @@ -1675,16 +1789,17 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) int error; int need_rehash = 0; - dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name); + dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id, + dir->i_ino, dentry); + trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count > 1) { + if (d_count(dentry) > 1) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(dentry->d_inode, 0); error = nfs_sillyrename(dir, dentry); - return error; + goto out; } if (!d_unhashed(dentry)) { __d_drop(dentry); @@ -1696,6 +1811,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); +out: + trace_nfs_unlink_exit(dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_unlink); @@ -1717,15 +1834,14 @@ EXPORT_SYMBOL_GPL(nfs_unlink); */ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct pagevec lru_pvec; struct page *page; char *kaddr; struct iattr attr; unsigned int pathlen = strlen(symname); int error; - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry->d_name.name, symname); + dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry, symname); if (pathlen > PAGE_SIZE) return -ENAMETOOLONG; @@ -1743,11 +1859,13 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr); + trace_nfs_symlink_enter(dir, dentry); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { - dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", + dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, - dentry->d_name.name, symname, error); + dentry, symname, error); d_drop(dentry); __free_page(page); return error; @@ -1757,13 +1875,15 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * No big deal if we can't add this page to the page cache here. * READLINK will get the missing page from the server if needed. */ - pagevec_init(&lru_pvec, 0); - if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, + if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, GFP_KERNEL)) { - pagevec_add(&lru_pvec, page); - pagevec_lru_add_file(&lru_pvec); SetPageUptodate(page); unlock_page(page); + /* + * add_to_page_cache_lru() grabs an extra page refcount. + * Drop it here to avoid leaking this page later. + */ + page_cache_release(page); } else __free_page(page); @@ -1777,10 +1897,10 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) struct inode *inode = old_dentry->d_inode; int error; - dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - dentry->d_parent->d_name.name, dentry->d_name.name); + dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n", + old_dentry, dentry); + trace_nfs_link_enter(inode, dir, dentry); NFS_PROTO(inode)->return_delegation(inode); d_drop(dentry); @@ -1789,6 +1909,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) ihold(inode); d_add(dentry, inode); } + trace_nfs_link_exit(inode, dir, dentry, error); return error; } EXPORT_SYMBOL_GPL(nfs_link); @@ -1823,13 +1944,14 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct dentry *dentry = NULL, *rehash = NULL; + struct rpc_task *task; int error = -EBUSY; - dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", - old_dentry->d_parent->d_name.name, old_dentry->d_name.name, - new_dentry->d_parent->d_name.name, new_dentry->d_name.name, - new_dentry->d_count); + dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", + old_dentry, new_dentry, + d_count(new_dentry)); + trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); /* * For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the @@ -1846,7 +1968,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, rehash = new_dentry; } - if (new_dentry->d_count > 2) { + if (d_count(new_dentry) > 2) { int err; /* copy the target dentry's name */ @@ -1870,12 +1992,22 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode != NULL) NFS_PROTO(new_inode)->return_delegation(new_inode); - error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, - new_dir, &new_dentry->d_name); + task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); + if (IS_ERR(task)) { + error = PTR_ERR(task); + goto out; + } + + error = rpc_wait_for_completion_task(task); + if (error == 0) + error = task->tk_status; + rpc_put_task(task); nfs_mark_for_revalidate(old_inode); out: if (rehash) d_rehash(rehash); + trace_nfs_rename_exit(old_dir, old_dentry, + new_dir, new_dentry, error); if (!error) { if (new_inode != NULL) nfs_drop_nlink(new_inode); @@ -1900,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); kfree(entry); - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); - smp_mb__after_atomic_dec(); + smp_mb__after_atomic(); } static void nfs_access_free_list(struct list_head *head) @@ -1916,17 +2048,18 @@ static void nfs_access_free_list(struct list_head *head) } } -int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc) +unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; int nr_to_scan = sc->nr_to_scan; gfp_t gfp_mask = sc->gfp_mask; + long freed = 0; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + return SHRINK_STOP; spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { @@ -1942,21 +2075,28 @@ int nfs_access_cache_shrinker(struct shrinker *shrink, struct nfs_access_entry, lru); list_move(&cache->lru, &head); rb_erase(&cache->rb_node, &nfsi->access_cache); + freed++; if (!list_empty(&nfsi->access_cache_entry_lru)) list_move_tail(&nfsi->access_cache_inode_lru, &nfs_access_lru_list); else { remove_lru_entry: list_del_init(&nfsi->access_cache_inode_lru); - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } spin_unlock(&inode->i_lock); } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); - return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; + return freed; +} + +unsigned long +nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) @@ -2092,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) nfs_access_add_rbtree(inode, cache); /* Update accounting */ - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_long_inc(&nfs_access_nr_entries); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); /* Add inode to global LRU list */ if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { @@ -2125,9 +2265,11 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) struct nfs_access_entry cache; int status; + trace_nfs_access_enter(inode); + status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) - goto out; + goto out_cached; /* Be clever: ask server to check for all possible rights */ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; @@ -2140,13 +2282,15 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } - return status; + goto out; } nfs_access_add_cache(inode, &cache); +out_cached: + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + status = -EACCES; out: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) - return 0; - return -EACCES; + trace_nfs_access_exit(inode, status); + return status; } static int nfs_open_permission_mask(int openflags) @@ -2192,11 +2336,6 @@ int nfs_permission(struct inode *inode, int mask) case S_IFLNK: goto out; case S_IFREG: - /* NFSv4 has atomic_open... */ - if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && (mask & MAY_OPEN) - && !(mask & MAY_EXEC)) - goto out; break; case S_IFDIR: /* @@ -2221,7 +2360,7 @@ out: if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) res = -EACCES; - dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", + dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0bd7a55a5f0..f11b9eed0de 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +/* + * nfs_direct_select_verf - select the right verifier + * @dreq - direct request possibly spanning multiple servers + * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs + * @ds_idx - index of data server in data server list, only valid if ds_clp set + * + * returns the correct verifier to use given the role of the server + */ +static struct nfs_writeverf * +nfs_direct_select_verf(struct nfs_direct_req *dreq, + struct nfs_client *ds_clp, + int ds_idx) +{ + struct nfs_writeverf *verfp = &dreq->verf; + +#ifdef CONFIG_NFS_V4_1 + if (ds_clp) { + /* pNFS is in use, use the DS verf */ + if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) + verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; + else + WARN_ON_ONCE(1); + } +#endif + return verfp; +} + + +/* + * nfs_direct_set_hdr_verf - set the write/commit verifier + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verfs + * + * Set the server's (MDS or DS) "seen" verifier + */ +static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + WARN_ON_ONCE(verfp->committed >= 0); + memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); + WARN_ON_ONCE(verfp->committed < 0); +} + +/* + * nfs_direct_cmp_hdr_verf - compare verifier for pgio header + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verf + * + * set the server's "seen" verf if not initialized. + * returns result of comparison between @hdr->verf and the "seen" + * verf of the server used by @hdr (DS or MDS) + */ +static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + if (verfp->committed < 0) { + nfs_direct_set_hdr_verf(dreq, hdr); + return 0; + } + return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +} + +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) +/* + * nfs_direct_cmp_commit_data_verf - compare verifier for commit data + * @dreq - direct request possibly spanning multiple servers + * @data - commit data to validate against previously seen verf + * + * returns result of comparison between @data->verf and the verf of + * the server used by @data (DS or MDS) + */ +static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, + struct nfs_commit_data *data) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, data->ds_clp, + data->ds_commit_index); + WARN_ON_ONCE(verfp->committed < 0); + return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); +} +#endif + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -121,22 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { #ifndef CONFIG_NFS_SWAP - dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, - (long long) pos, nr_segs); + dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", + iocb->ki_filp, (long long) pos, iter->nr_segs); return -EINVAL; #else - VM_BUG_ON(iocb->ki_left != PAGE_SIZE); VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ || rw == KERNEL_READ) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, + return nfs_file_direct_read(iocb, iter, pos, rw == READ ? true : false); - return nfs_file_direct_write(iocb, iov, nr_segs, pos, + return nfs_file_direct_write(iocb, iter, pos, rw == WRITE ? true : false); #endif /* CONFIG_NFS_SWAP */ } @@ -170,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_get(&dreq->kref); init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->mds_cinfo.list); + dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); @@ -224,14 +314,31 @@ out: * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust * the iocb is still valid here if this is a synchronous request. */ -static void nfs_direct_complete(struct nfs_direct_req *dreq) +static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) { + struct inode *inode = dreq->inode; + + if (dreq->iocb && write) { + loff_t pos = dreq->iocb->ki_pos + dreq->count; + + spin_lock(&inode->i_lock); + if (i_size_read(inode) < pos) + i_size_write(inode, pos); + spin_unlock(&inode->i_lock); + } + + if (write) + nfs_zap_mapping(inode, inode->i_mapping); + + inode_dio_done(inode); + if (dreq->iocb) { long res = (long) dreq->error; if (!res) res = (long) dreq->count; aio_complete(dreq->iocb, res, 0); } + complete_all(&dreq->completion); nfs_direct_req_release(dreq); @@ -239,9 +346,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) static void nfs_direct_readpage_release(struct nfs_page *req) { - dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", + dprintk("NFS: direct read done (%s/%llu %d@%lld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_release_request(req); @@ -274,7 +381,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) } out_put: if (put_dreq(dreq)) - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, false); hdr->release(hdr); } @@ -307,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + struct iov_iter *iter, + loff_t pos) +{ + struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; + ssize_t result = -EINVAL; + size_t requested_bytes = 0; + size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), - GFP_KERNEL); - if (!pagevec) - break; - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 1, pagevec); - if (WARN_ON(result != 1)) - break; - } + nfs_pageio_init_read(&desc, dreq->inode, false, + &nfs_direct_read_completion_ops); + get_dreq(dreq); + desc.pg_dreq = dreq; + atomic_inc(&inode->i_dio_count); - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + result = iov_iter_get_pages_alloc(iter, &pagevec, + rsize, &pgbase); + if (result < 0) + break; + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -374,54 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de } req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; nfs_release_request(req); break; } pgbase = 0; bytes -= req_len; - started += req_len; - user_addr += req_len; + requested_bytes += req_len; pos += req_len; - count -= req_len; dreq->bytes_left -= req_len; } - /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - -static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos, bool uio) -{ - struct nfs_pageio_descriptor desc; - ssize_t result = -EINVAL; - size_t requested_bytes = 0; - unsigned long seg; - - NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, - &nfs_direct_read_completion_ops); - get_dreq(dreq); - desc.pg_dreq = dreq; - - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); + kvfree(pagevec); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) - break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -431,29 +481,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { + inode_dio_done(inode); nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; } if (put_dreq(dreq)) - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, false); return 0; } -static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @iter: vector of user buffers into which to read data + * @pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file. For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change. Our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { - ssize_t result = -ENOMEM; - struct inode *inode = iocb->ki_filp->f_mapping->host; + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; + ssize_t result = -EINVAL; + size_t count = iov_iter_count(iter); + nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); + + dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); + + result = 0; + if (!count) + goto out; + + mutex_lock(&inode->i_mutex); + result = nfs_sync_mapping(mapping); + if (result) + goto out_unlock; + task_io_account_read(count); + + result = -ENOMEM; dreq = nfs_direct_req_alloc(); if (dreq == NULL) - goto out; + goto out_unlock; dreq->inode = inode; - dreq->bytes_left = iov_length(iov, nr_segs); + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -464,22 +554,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - NFS_I(inode)->read_io += iov_length(iov, nr_segs); - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); - if (!result) + NFS_I(inode)->read_io += count; + result = nfs_direct_read_schedule_iovec(dreq, iter, pos); + + mutex_unlock(&inode->i_mutex); + + if (!result) { result = nfs_direct_wait(dreq); + if (result > 0) + iocb->ki_pos = pos + result; + } + + nfs_direct_req_release(dreq); + return result; + out_release: nfs_direct_req_release(dreq); +out_unlock: + mutex_unlock(&inode->i_mutex); out: return result; } -static void nfs_inode_dio_write_done(struct inode *inode) -{ - nfs_zap_mapping(inode, inode->i_mapping); - inode_dio_done(inode); -} - #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { @@ -498,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -537,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) dprintk("NFS: %5u commit failed with error %d.\n", data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } @@ -595,8 +691,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) nfs_direct_write_reschedule(dreq); break; default: - nfs_inode_dio_write_done(dreq->inode); - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, true); } } @@ -612,114 +707,10 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_inode_dio_write_done(inode); - nfs_direct_complete(dreq); + nfs_direct_complete(dreq, true); } #endif -/* - * NB: Return the value of the first error return code. Subsequent - * errors after the first one are ignored. - */ -/* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation. If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes. Write length accounting is - * handled automatically by nfs_direct_write_result(). Otherwise, if - * no requests have been sent, just return an error. - */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); - - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); - if (!pagevec) - break; - - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 0, pagevec); - if (WARN_ON(result != 1)) - break; - } - - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } - - for (i = 0; i < npages; i++) { - struct nfs_page *req; - unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], - pgbase, req_len); - if (IS_ERR(req)) { - result = PTR_ERR(req); - break; - } - nfs_lock_request(req); - req->wb_index = pos >> PAGE_SHIFT; - req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; - nfs_unlock_and_release_request(req); - break; - } - pgbase = 0; - bytes -= req_len; - started += req_len; - user_addr += req_len; - pos += req_len; - count -= req_len; - dreq->bytes_left -= req_len; - } - /* The nfs_page now hold references to these pages */ - nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; @@ -749,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, hdr->verf, - sizeof(dreq->verf)); + nfs_direct_set_hdr_verf(dreq, hdr); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + dreq->flags = + NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else bit = NFS_IOHDR_NEED_COMMIT; @@ -765,6 +756,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); switch (bit) { @@ -799,33 +791,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { .completion = nfs_direct_write_completion, }; + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. + */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos, bool uio) + struct iov_iter *iter, + loff_t pos) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; ssize_t result = 0; size_t requested_bytes = 0; - unsigned long seg; + size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, + nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); atomic_inc(&inode->i_dio_count); - NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); + NFS_I(inode)->write_io += iov_iter_count(iter); + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + + result = iov_iter_get_pages_alloc(iter, &pagevec, + wsize, &pgbase); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, + pgbase, req_len); + if (IS_ERR(req)) { + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; + nfs_unlock_and_release_request(req); + break; + } + pgbase = 0; + bytes -= req_len; + requested_bytes += req_len; + pos += req_len; + dreq->bytes_left -= req_len; + } + nfs_direct_release_pages(pagevec, npages); + kvfree(pagevec); + if (result < 0) break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -844,100 +880,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, return 0; } -static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, - size_t count, bool uio) -{ - ssize_t result = -ENOMEM; - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct nfs_direct_req *dreq; - struct nfs_lock_context *l_ctx; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - goto out; - - dreq->inode = inode; - dreq->bytes_left = count; - dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); - l_ctx = nfs_get_lock_context(dreq->ctx); - if (IS_ERR(l_ctx)) { - result = PTR_ERR(l_ctx); - goto out_release; - } - dreq->l_ctx = l_ctx; - if (!is_sync_kiocb(iocb)) - dreq->iocb = iocb; - - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); - if (!result) - result = nfs_direct_wait(dreq); -out_release: - nfs_direct_req_release(dreq); -out: - return result; -} - -/** - * nfs_file_direct_read - file direct read operation for NFS files - * @iocb: target I/O control block - * @iov: vector of user buffers into which to read data - * @nr_segs: size of iov vector - * @pos: byte offset in file where reading starts - * - * We use this function for direct reads instead of calling - * generic_file_aio_read() in order to avoid gfar's check to see if - * the request starts before the end of the file. For that check - * to work, we must generate a GETATTR before each direct read, and - * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change. Our preference is simply - * to do all reads the application wants, and the server will take - * care of managing the end of file boundary. - * - * This function also eliminates unnecessarily updating the file's - * atime locally, as the NFS server sets the file's atime, and this - * client must read the updated atime from the server back into its - * cache. - */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) -{ - ssize_t retval = -EINVAL; - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - size_t count; - - count = iov_length(iov, nr_segs); - nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - - dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); - - retval = 0; - if (!count) - goto out; - - retval = nfs_sync_mapping(mapping); - if (retval) - goto out; - - task_io_account_read(count); - - retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); - if (retval > 0) - iocb->ki_pos = pos + retval; - -out: - return retval; -} - /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @iov: vector of user buffers from which to write data - * @nr_segs: size of iov vector + * @iter: vector of user buffers from which to write data * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -955,51 +901,97 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { - ssize_t retval = -EINVAL; + ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count; + struct inode *inode = mapping->host; + struct nfs_direct_req *dreq; + struct nfs_lock_context *l_ctx; + loff_t end; + size_t count = iov_iter_count(iter); + end = (pos + count - 1) >> PAGE_CACHE_SHIFT; - count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - count, (long long) pos); + dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", + file, count, (long long) pos); - retval = generic_write_checks(file, &pos, &count, 0); - if (retval) + result = generic_write_checks(file, &pos, &count, 0); + if (result) goto out; - retval = -EINVAL; + result = -EINVAL; if ((ssize_t) count < 0) goto out; - retval = 0; + result = 0; if (!count) goto out; - retval = nfs_sync_mapping(mapping); - if (retval) - goto out; + mutex_lock(&inode->i_mutex); + + result = nfs_sync_mapping(mapping); + if (result) + goto out_unlock; + + if (mapping->nrpages) { + result = invalidate_inode_pages2_range(mapping, + pos >> PAGE_CACHE_SHIFT, end); + if (result) + goto out_unlock; + } task_io_account_write(count); - retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); - if (retval > 0) { - struct inode *inode = mapping->host; + result = -ENOMEM; + dreq = nfs_direct_req_alloc(); + if (!dreq) + goto out_unlock; - iocb->ki_pos = pos + retval; - spin_lock(&inode->i_lock); - if (i_size_read(inode) < iocb->ki_pos) - i_size_write(inode, iocb->ki_pos); - spin_unlock(&inode->i_lock); + dreq->inode = inode; + dreq->bytes_left = count; + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + l_ctx = nfs_get_lock_context(dreq->ctx); + if (IS_ERR(l_ctx)) { + result = PTR_ERR(l_ctx); + goto out_release; + } + dreq->l_ctx = l_ctx; + if (!is_sync_kiocb(iocb)) + dreq->iocb = iocb; + + result = nfs_direct_write_schedule_iovec(dreq, iter, pos); + + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_CACHE_SHIFT, end); } + + mutex_unlock(&inode->i_mutex); + + if (!result) { + result = nfs_direct_wait(dreq); + if (result > 0) { + struct inode *inode = mapping->host; + + iocb->ki_pos = pos + result; + spin_lock(&inode->i_lock); + if (i_size_read(inode) < iocb->ki_pos) + i_size_write(inode, iocb->ki_pos); + spin_unlock(&inode->i_lock); + } + } + nfs_direct_req_release(dreq); + return result; + +out_release: + nfs_direct_req_release(dreq); +out_unlock: + mutex_unlock(&inode->i_mutex); out: - return retval; + return result; } /** diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index ca4b11ec87a..d25f10fb492 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -28,7 +29,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, kfree(ip_addr); return ret; } -EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #else @@ -42,10 +42,13 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #include <linux/seq_file.h> #include <linux/inet.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "dns_resolve.h" #include "cache_lib.h" #include "netns.h" @@ -142,7 +145,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } @@ -349,64 +352,65 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, ret = -ESRCH; return ret; } -EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); + +static struct cache_detail nfs_dns_resolve_template = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + int nfs_dns_resolver_cache_init(struct net *net) { - int err = -ENOMEM; + int err; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); + nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net); + if (IS_ERR(nn->nfs_dns_resolve)) + return PTR_ERR(nn->nfs_dns_resolve); + + err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; - nn->nfs_dns_resolve = cd; return 0; err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: + cache_destroy_net(nn->nfs_dns_resolve, net); return err; } void nfs_dns_resolver_cache_destroy(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); + nfs_cache_unregister_net(net, nn->nfs_dns_resolve); + cache_destroy_net(nn->nfs_dns_resolve, net); } +static int nfs4_dns_net_init(struct net *net) +{ + return nfs_dns_resolver_cache_init(net); +} + +static void nfs4_dns_net_exit(struct net *net) +{ + nfs_dns_resolver_cache_destroy(net); +} + +static struct pernet_operations nfs4_dns_resolver_ops = { + .init = nfs4_dns_net_init, + .exit = nfs4_dns_net_exit, +}; + static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -443,11 +447,24 @@ static struct notifier_block nfs_dns_resolver_block = { int nfs_dns_resolver_init(void) { - return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); + int err; + + err = register_pernet_subsys(&nfs4_dns_resolver_ops); + if (err < 0) + goto out; + err = rpc_pipefs_notifier_register(&nfs_dns_resolver_block); + if (err < 0) + goto out1; + return 0; +out1: + unregister_pernet_subsys(&nfs4_dns_resolver_ops); +out: + return err; } void nfs_dns_resolver_destroy(void) { rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); + unregister_pernet_subsys(&nfs4_dns_resolver_ops); } #endif diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3c2b893665b..4042ff58fe3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -37,6 +37,8 @@ #include "iostat.h" #include "fscache.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_FILE static const struct vm_operations_struct nfs_file_vm_ops; @@ -63,9 +65,7 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dprintk("NFS: open file(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: open file(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); @@ -79,9 +79,7 @@ nfs_file_open(struct inode *inode, struct file *filp) int nfs_file_release(struct inode *inode, struct file *filp) { - dprintk("NFS: release(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); @@ -121,10 +119,8 @@ force_reval: loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { - dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - offset, whence); + dprintk("NFS: llseek file(%pD2, %lld, %d)\n", + filp, offset, whence); /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate @@ -148,12 +144,9 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek); int nfs_file_flush(struct file *file, fl_owner_t id) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); - dprintk("NFS: flush(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: flush(%pD2)\n", file); nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) @@ -172,23 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id) EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t -nfs_file_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +nfs_file_read(struct kiocb *iocb, struct iov_iter *to) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); + return nfs_file_direct_read(iocb, to, iocb->ki_pos, true); - dprintk("NFS: read(%s/%s, %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); + dprintk("NFS: read(%pD2, %zu@%lu)\n", + iocb->ki_filp, + iov_iter_count(to), (unsigned long) iocb->ki_pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { - result = generic_file_aio_read(iocb, iov, nr_segs, pos); + result = generic_file_read_iter(iocb, to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } @@ -201,13 +192,11 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); ssize_t res; - dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", + filp, (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { @@ -222,12 +211,10 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int status; - dprintk("NFS: mmap(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("NFS: mmap(%pD2)\n", file); /* Note: generic_file_mmap() returns ENOSYS on nommu systems * so we call that before revalidating the mapping @@ -256,15 +243,12 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { - struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); int have_error, do_resend, status; int ret = 0; - dprintk("NFS: fsync file(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); @@ -292,7 +276,9 @@ static int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); + + trace_nfs_fsync_enter(inode); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -310,6 +296,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) end = LLONG_MAX; } while (ret == -EAGAIN); + trace_nfs_fsync_exit(inode, ret); return ret; } @@ -366,10 +353,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; int once_thru = 0; - dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); start: /* @@ -406,12 +391,11 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + struct nfs_open_context *ctx = nfs_file_open_context(file); int status; - dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); /* * Zero any uninitialised parts of the page, and then mark the page @@ -441,6 +425,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, if (status < 0) return status; NFS_I(mapping->host)->write_io += copied; + + if (nfs_ctx_key_to_expire(ctx)) { + status = nfs_wb_all(mapping->host); + if (status < 0) + return status; + } + return copied; } @@ -451,11 +442,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, * - Called if either PG_private or PG_fscache is set on the page * - Caller holds page lock */ -static void nfs_invalidate_page(struct page *page, unsigned long offset) +static void nfs_invalidate_page(struct page *page, unsigned int offset, + unsigned int length) { - dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); + dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n", + page, offset, length); - if (offset != 0) + if (offset != 0 || length < PAGE_CACHE_SIZE) return; /* Cancel any unstarted writes on this page */ nfs_wb_page_cancel(page_file_mapping(page)->host, page); @@ -493,6 +486,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) return nfs_fscache_release_page(page, gfp); } +static void nfs_check_dirty_writeback(struct page *page, + bool *dirty, bool *writeback) +{ + struct nfs_inode *nfsi; + struct address_space *mapping = page_file_mapping(page); + + if (!mapping || PageSwapCache(page)) + return; + + /* + * Check if an unstable page is currently being committed and + * if so, have the VM treat it as if the page is under writeback + * so it will not block due to pages that will shortly be freeable. + */ + nfsi = NFS_I(mapping->host); + if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + *writeback = true; + return; + } + + /* + * If PagePrivate() is set, then the page is not freeable and as the + * inode is not being committed, it's not going to be cleaned in the + * near future so treat it as dirty + */ + if (PagePrivate(page)) + *dirty = true; +} + /* * Attempt to clear the private state associated with a page when an error * occurs that requires the cached contents of an inode to be written back or @@ -540,6 +562,7 @@ const struct address_space_operations nfs_file_aops = { .direct_IO = nfs_direct_IO, .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, + .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, #ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, @@ -556,22 +579,21 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct file *filp = vma->vm_file; - struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = file_inode(filp); unsigned pagelen; int ret = VM_FAULT_NOPAGE; struct address_space *mapping; - dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - filp->f_mapping->host->i_ino, + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", + filp, filp->f_mapping->host->i_ino, (long long)page_offset(page)); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + nfs_fscache_wait_on_page_write(NFS_I(inode), page); lock_page(page); mapping = page_file_mapping(page); - if (mapping != dentry->d_inode->i_mapping) + if (mapping != inode->i_mapping) goto out_unlock; wait_on_page_writeback(page); @@ -594,6 +616,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; @@ -605,26 +628,30 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) return 1; ctx = nfs_file_open_context(filp); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) || + nfs_ctx_key_to_expire(ctx)) return 1; return 0; } -ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; - if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); + result = nfs_key_timeout_notify(file, inode); + if (result) + return result; + + if (file->f_flags & O_DIRECT) + return nfs_file_direct_write(iocb, from, pos, true); - dprintk("NFS: write(%s/%s, %lu@%Ld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %zu@%Ld)\n", + file, count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -632,8 +659,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, /* * O_APPEND implies that we must revalidate the file length. */ - if (iocb->ki_filp->f_flags & O_APPEND) { - result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (file->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, file); if (result) goto out; } @@ -642,13 +669,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; - result = generic_file_aio_write(iocb, iov, nr_segs, pos); + result = generic_file_write_iter(iocb, from); if (result > 0) written = result; /* Return error values for O_DSYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = vfs_fsync(iocb->ki_filp, 0); + if (result >= 0 && nfs_need_sync_write(file, inode)) { + int err = vfs_fsync(file, 0); if (err < 0) result = err; } @@ -663,38 +690,6 @@ out_swapfile: } EXPORT_SYMBOL_GPL(nfs_file_write); -ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) -{ - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - unsigned long written = 0; - ssize_t ret; - - dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); - - /* - * The combination of splice and an O_APPEND destination is disallowed. - */ - - ret = generic_file_splice_write(pipe, filp, ppos, count, flags); - if (ret > 0) - written = ret; - - if (ret >= 0 && nfs_need_sync_write(filp, inode)) { - int err = vfs_fsync(filp, 0); - if (err < 0) - ret = err; - } - if (ret > 0) - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); - return ret; -} -EXPORT_SYMBOL_GPL(nfs_file_splice_write); - static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -744,6 +739,7 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; + struct nfs_lock_context *l_ctx; int status; /* @@ -752,6 +748,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); + l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); + if (!IS_ERR(l_ctx)) { + status = nfs_iocounter_wait(&l_ctx->io_count); + nfs_put_lock_context(l_ctx); + if (status < 0) + return status; + } + /* NOTE: special case * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. @@ -824,10 +828,8 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int ret = -ENOLCK; int is_local = 0; - dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags, + dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", + filp, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); @@ -864,10 +866,8 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_mapping->host; int is_local = 0; - dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags); + dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", + filp, fl->fl_type, fl->fl_flags); if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -885,10 +885,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) is_local = 1; /* We're simulating flock() locks using posix locks on the server */ - fl->fl_owner = (fl_owner_t)filp; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); @@ -901,19 +897,17 @@ EXPORT_SYMBOL_GPL(nfs_flock); */ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - dprintk("NFS: setlease(%s/%s, arg=%ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, arg); + dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); return -EINVAL; } EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = nfs_file_read, + .write_iter = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -922,7 +916,7 @@ const struct file_operations nfs_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, + .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile new file mode 100644 index 00000000000..8516cdffb9e --- /dev/null +++ b/fs/nfs/filelayout/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the pNFS Files Layout Driver kernel module +# +obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o +nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c index 194c4841033..d2eba1c13b7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -35,10 +35,11 @@ #include <linux/sunrpc/metrics.h> -#include "nfs4session.h" -#include "internal.h" -#include "delegation.h" -#include "nfs4filelayout.h" +#include "../nfs4session.h" +#include "../internal.h" +#include "../delegation.h" +#include "filelayout.h" +#include "../nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -83,43 +84,45 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_write_data *data) +static void filelayout_reset_write(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " - "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, data->task.tk_pid, hdr->inode->i_sb->s_id, - (long long)NFS_FILEID(hdr->inode), + (unsigned long long)NFS_FILEID(hdr->inode), data->args.count, (unsigned long long)data->args.offset); task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops); + hdr->completion_ops, + hdr->dreq); } } -static void filelayout_reset_read(struct nfs_read_data *data) +static void filelayout_reset_read(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " - "(req %s/%lld, %u bytes @ offset %llu)\n", __func__, + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, data->task.tk_pid, hdr->inode->i_sb->s_id, - (long long)NFS_FILEID(hdr->inode), + (unsigned long long)NFS_FILEID(hdr->inode), data->args.count, (unsigned long long)data->args.offset); task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops); + hdr->completion_ops, + hdr->dreq); } } @@ -127,7 +130,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) { if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return; - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); pnfs_return_layout(inode); } @@ -157,11 +159,14 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(mds_server, state); + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(mds_server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + } nfs4_schedule_lease_recovery(mds_client); goto wait_on_recovery; /* DS session errors */ @@ -225,6 +230,9 @@ reset: out: task->tk_status = 0; return -EAGAIN; +out_bad_stateid: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) @@ -235,11 +243,12 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; + trace_nfs4_pnfs_read(data, task->tk_status); err = filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, hdr->lseg); @@ -261,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_write_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; @@ -270,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) return; pnfs_set_layoutcommit(wdata); - dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -296,31 +305,41 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(rdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_read(rdata); rpc_exit(task, 0); return; } - rdata->read_done_cb = filelayout_read_done_cb; + rdata->pgio_done_cb = filelayout_read_done_cb; - nfs41_setup_sequence(rdata->ds_clp->cl_session, + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - task); + task)) + return; + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && - task->tk_status == 0) + task->tk_status == 0) { + nfs41_sequence_done(task, &rdata->res.seq_res); return; + } /* Note this may cause RPC to be resent */ rdata->header->mds_ops->rpc_call_done(task, data); @@ -328,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -344,11 +363,12 @@ static void filelayout_read_release(void *data) } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; + trace_nfs4_pnfs_write(data, task->tk_status); err = filelayout_async_handle_error(task, data->args.context->state, data->ds_clp, hdr->lseg); @@ -381,6 +401,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, { int err; + trace_nfs4_pnfs_commit_ds(data, task->tk_status); err = filelayout_async_handle_error(task, NULL, data->ds_clp, data->lseg); @@ -398,27 +419,37 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } if (filelayout_reset_to_mds(wdata->header->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); filelayout_reset_write(wdata); rpc_exit(task, 0); return; } - nfs41_setup_sequence(wdata->ds_clp->cl_session, + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && - task->tk_status == 0) + task->tk_status == 0) { + nfs41_sequence_done(task, &wdata->res.seq_res); return; + } /* Note this may cause RPC to be resent */ wdata->header->mds_ops->rpc_call_done(task, data); @@ -426,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -498,11 +529,12 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_read_data *data) +filelayout_read_pagelist(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -517,12 +549,18 @@ filelayout_read_pagelist(struct nfs_read_data *data) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + dprintk("%s USE DS: %s cl_count %d\n", __func__, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; @@ -531,18 +569,19 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, - &filelayout_read_call_ops, RPC_TASK_SOFTCONN); + nfs_initiate_pgio(ds_clnt, data, + &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_write_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; loff_t offset = data->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -553,24 +592,27 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->write_done_cb = filelayout_write_done_cb; + data->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; - /* - * Get the file offset on the dserver. Set the write offset to - * this offset and save the original offset. - */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, + nfs_initiate_pgio(ds_clnt, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; @@ -594,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_deviceid_node *d; struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; - struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); dprintk("--> %s\n", __func__); @@ -612,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { + if (!fl->stripe_unit) { dprintk("%s Invalid stripe unit (%u)\n", __func__, fl->stripe_unit); goto out; @@ -622,7 +663,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, NFS_SERVER(lo->plh_inode)->nfs_client, id); if (d == NULL) { - dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags); + dsaddr = filelayout_get_device_info(lo->plh_inode, id, + lo->plh_lc_cred, gfp_flags); if (dsaddr == NULL) goto out; } else @@ -648,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out_put; } - if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { - dprintk("%s Stripe unit (%u) not aligned with rsize %u " - "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, - nfss->wsize); - } - status = 0; out: dprintk("--> %s returns %d\n", __func__, status); @@ -806,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); struct pnfs_commit_bucket *buckets; - int size; + int size, i; if (fl->commit_through_mds) return 0; - if (cinfo->ds->nbuckets != 0) { + + size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + if (cinfo->ds->nbuckets >= size) { /* This assumes there is only one IOMODE_RW lseg. What * we really want to do is have a layout_hdr level * dictionary of <multipath_list4, fh> keys, each @@ -820,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, return 0; } - size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), gfp_flags); if (!buckets) return -ENOMEM; - else { - int i; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + /* mark direct verifier as unset */ + buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; + } - spin_lock(cinfo->lock); - if (cinfo->ds->nbuckets != 0) - kfree(buckets); - else { - cinfo->ds->buckets = buckets; - cinfo->ds->nbuckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&buckets[i].written); - INIT_LIST_HEAD(&buckets[i].committing); - } - } - spin_unlock(cinfo->lock); - return 0; + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets >= size) + goto out; + for (i = 0; i < cinfo->ds->nbuckets; i++) { + list_splice(&cinfo->ds->buckets[i].written, + &buckets[i].written); + list_splice(&cinfo->ds->buckets[i].committing, + &buckets[i].committing); + buckets[i].direct_verf.committed = + cinfo->ds->buckets[i].direct_verf.committed; + buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; + buckets[i].clseg = cinfo->ds->buckets[i].clseg; } + swap(cinfo->ds->buckets, buckets); + cinfo->ds->nbuckets = size; +out: + spin_unlock(cinfo->lock); + kfree(buckets); + return 0; } static struct pnfs_layout_segment * @@ -871,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return true : coalesce page - * return false : don't coalesce page + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. */ -static bool +static size_t filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { + unsigned int size; u64 p_stripe, r_stripe; - u32 stripe_unit; + u32 stripe_offset; + u64 segment_offset = pgio->pg_lseg->pls_range.offset; + u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; - if (!pnfs_generic_pg_test(pgio, prev, req) || - !nfs_generic_pg_test(pgio, prev, req)) - return false; + /* calls nfs_generic_pg_test */ + size = pnfs_generic_pg_test(pgio, prev, req); + if (!size) + return 0; - p_stripe = (u64)req_offset(prev); - r_stripe = (u64)req_offset(req); - stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; + /* see if req and prev are in the same stripe */ + if (prev) { + p_stripe = (u64)req_offset(prev) - segment_offset; + r_stripe = (u64)req_offset(req) - segment_offset; + do_div(p_stripe, stripe_unit); + do_div(r_stripe, stripe_unit); - do_div(p_stripe, stripe_unit); - do_div(r_stripe, stripe_unit); + if (p_stripe != r_stripe) + return 0; + } - return (p_stripe == r_stripe); + /* calculate remaining bytes in the current stripe */ + div_u64_rem((u64)req_offset(req) - segment_offset, + stripe_unit, + &stripe_offset); + WARN_ON_ONCE(stripe_offset > stripe_unit); + if (stripe_offset >= stripe_unit) + return 0; + return min(stripe_unit - (unsigned int)stripe_offset, size); } static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (req->wb_offset != req->wb_pgbase) { - /* - * Handling unaligned pages is difficult, because have to - * somehow split a req in two in certain cases in the - * pg.test code. Avoid this by just not using pnfs - * in this case. - */ - nfs_pageio_reset_read_mds(pgio); - return; - } - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, @@ -929,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (req->wb_offset != req->wb_pgbase) - goto out_mds; - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, @@ -1023,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req, */ j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); + spin_lock(cinfo->lock); buckets = cinfo->ds->buckets; list = &buckets[i].written; if (list_empty(list)) { @@ -1036,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req, } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; + spin_unlock(cinfo->lock); return list; } @@ -1079,16 +1128,19 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) { struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; u32 idx; struct nfs_fh *fh; idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) { - prepare_to_resend_writes(data); - filelayout_commit_release(data); - return -EAGAIN; - } + if (!ds) + goto out_err; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode); + if (IS_ERR(ds_clnt)) + goto out_err; + dprintk("%s ino %lu, how %d cl_count %d\n", __func__, data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; @@ -1097,9 +1149,13 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, + return nfs_initiate_commit(ds_clnt, data, &filelayout_commit_call_ops, how, RPC_TASK_SOFTCONN); +out_err: + prepare_to_resend_writes(data); + filelayout_commit_release(data); + return -EAGAIN; } static int @@ -1125,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst, return ret; } +/* Note called with cinfo->lock held. */ static int filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo, @@ -1169,19 +1226,22 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *b; + struct pnfs_layout_segment *freeme; int i; - /* NOTE cinfo->lock is NOT held, relying on fact that this is - * only called on single thread per dreq. - * Can't take the lock because need to do pnfs_put_lseg - */ +restart: + spin_lock(cinfo->lock); for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - pnfs_put_lseg(b->wlseg); + freeme = b->wlseg; b->wlseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + goto restart; } } cinfo->ds->nwritten = 0; + spin_unlock(cinfo->lock); } static unsigned int @@ -1192,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; + struct pnfs_layout_segment *freeme; fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; @@ -1202,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (!data) break; data->ds_commit_index = i; + spin_lock(cinfo->lock); data->lseg = bucket->clseg; bucket->clseg = NULL; + spin_unlock(cinfo->lock); list_add(&data->pages, list); nreq++; } @@ -1213,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (list_empty(&bucket->committing)) continue; nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - pnfs_put_lseg(bucket->clseg); + spin_lock(cinfo->lock); + freeme = bucket->clseg; bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); } /* Caller will clean up entries put on list */ return nreq; @@ -1279,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct nfs4_filelayout *flo; flo = kzalloc(sizeof(*flo), gfp_flags); - return &flo->generic_hdr; + return flo != NULL ? &flo->generic_hdr : NULL; } static void diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h index 8c07241fe52..ffbddf2219e 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -30,13 +30,13 @@ #ifndef FS_NFS_NFS4FILELAYOUT_H #define FS_NFS_NFS4FILELAYOUT_H -#include "pnfs.h" +#include "../pnfs.h" /* * Default data server connection timeout and retrans vaules. * Set by module paramters dataserver_timeo and dataserver_retrans. */ -#define NFS4_DEF_DS_TIMEO 60 +#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ #define NFS4_DEF_DS_RETRANS 5 /* @@ -70,6 +70,8 @@ struct nfs4_pnfs_ds { struct list_head ds_addrs; struct nfs_client *ds_clp; atomic_t ds_count; + unsigned long ds_state; +#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ }; struct nfs4_file_layout_dsaddr { @@ -148,6 +150,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, + struct rpc_cred *cred, gfp_t gfp_flags); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index b720064bcd7..44bf0140a4c 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -31,10 +31,11 @@ #include <linux/nfs_fs.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/sunrpc/addr.h> -#include "internal.h" -#include "nfs4session.h" -#include "nfs4filelayout.h" +#include "../internal.h" +#include "../nfs4session.h" +#include "filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -94,7 +95,7 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) b6 = (struct sockaddr_in6 *)addr2; /* LINKLOCAL addresses must have matching scope_id */ - if (ipv6_addr_scope(&a6->sin6_addr) == + if (ipv6_addr_src_scope(&a6->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL && a6->sin6_scope_id != b6->sin6_scope_id) return false; @@ -184,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) if (status) goto out_put; + smp_wmb(); ds->ds_clp = clp; dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: @@ -667,7 +669,10 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) +filelayout_get_device_info(struct inode *inode, + struct nfs4_deviceid *dev_id, + struct rpc_cred *cred, + gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; @@ -707,8 +712,9 @@ filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gf pdev->pgbase = 0; pdev->pglen = max_resp_sz; pdev->mincount = 0; + pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; - rc = nfs4_proc_getdeviceinfo(server, pdev); + rc = nfs4_proc_getdeviceinfo(server, pdev, cred); dprintk("%s getdevice info returns %d\n", __func__, rc); if (rc) goto out_free; @@ -774,34 +780,57 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) return flseg->fh_array[i]; } +static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) +{ + might_sleep(); + wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, + nfs_wait_bit_killable, TASK_KILLABLE); +} + +static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) +{ + smp_mb__before_atomic(); + clear_bit(NFS4DS_CONNECTING, &ds->ds_state); + smp_mb__after_atomic(); + wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); +} + + struct nfs4_pnfs_ds * nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) { struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - - if (filelayout_test_devid_unavailable(devid)) - return NULL; + struct nfs4_pnfs_ds *ret = ds; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); filelayout_mark_devid_invalid(devid); - return NULL; + goto out; } + smp_rmb(); + if (ds->ds_clp) + goto out_test_devid; - if (!ds->ds_clp) { + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; err = nfs4_ds_connect(s, ds); - if (err) { + if (err) nfs4_mark_deviceid_unavailable(devid); - return NULL; - } + nfs4_clear_ds_conn_bit(ds); + } else { + /* Either ds is connected, or ds is NULL */ + nfs4_wait_ds_connect(ds); } - return ds; +out_test_devid: + if (filelayout_test_devid_unavailable(devid)) + ret = NULL; +out: + return ret; } module_param(dataserver_retrans, uint, 0644); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 24d1d1c5fca..3ef01f0ba0b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp) /* create a cache index for looking up filehandles */ clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, &nfs_fscache_server_index_def, - clp); + clp, true); dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", clp, clp->fscache); } @@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - nfss); + nfss, true); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", nfss, nfss->fscache); return; @@ -178,163 +178,79 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) /* * Initialise the per-inode cache cookie pointer for an NFS inode. */ -void nfs_fscache_init_inode_cookie(struct inode *inode) +void nfs_fscache_init_inode(struct inode *inode) { - NFS_I(inode)->fscache = NULL; - if (S_ISREG(inode->i_mode)) - set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); -} - -/* - * Get the per-inode cache cookie for an NFS inode. - */ -static void nfs_fscache_enable_inode_cookie(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; struct nfs_inode *nfsi = NFS_I(inode); - if (nfsi->fscache || !NFS_FSCACHE(inode)) + nfsi->fscache = NULL; + if (!S_ISREG(inode->i_mode)) return; - - if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { - nfsi->fscache = fscache_acquire_cookie( - NFS_SB(sb)->fscache, - &nfs_fscache_inode_object_def, - nfsi); - - dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", - sb, nfsi, nfsi->fscache); - } + nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, + &nfs_fscache_inode_object_def, + nfsi, false); } /* * Release a per-inode cookie. */ -void nfs_fscache_release_inode_cookie(struct inode *inode) +void nfs_fscache_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = nfs_i_fscache(inode); - dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", - nfsi, nfsi->fscache); + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); - fscache_relinquish_cookie(nfsi->fscache, 0); + fscache_relinquish_cookie(cookie, false); nfsi->fscache = NULL; } -/* - * Retire a per-inode cookie, destroying the data attached to it. - */ -void nfs_fscache_zap_inode_cookie(struct inode *inode) +static bool nfs_fscache_can_enable(void *data) { - struct nfs_inode *nfsi = NFS_I(inode); + struct inode *inode = data; - dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", - nfsi, nfsi->fscache); - - fscache_relinquish_cookie(nfsi->fscache, 1); - nfsi->fscache = NULL; + return !inode_is_open_for_write(inode); } /* - * Turn off the cache with regard to a per-inode cookie if opened for writing, - * invalidating all the pages in the page cache relating to the associated - * inode to clear the per-page caching. - */ -static void nfs_fscache_disable_inode_cookie(struct inode *inode) -{ - clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); - - if (NFS_I(inode)->fscache) { - dfprintk(FSCACHE, - "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - - /* Need to uncache any pages attached to this inode that - * fscache knows about before turning off the cache. - */ - fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); - nfs_fscache_zap_inode_cookie(inode); - } -} - -/* - * wait_on_bit() sleep function for uninterruptible waiting - */ -static int nfs_fscache_wait_bit(void *flags) -{ - schedule(); - return 0; -} - -/* - * Lock against someone else trying to also acquire or relinquish a cookie - */ -static inline void nfs_fscache_inode_lock(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) - wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, - nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); -} - -/* - * Unlock cookie management lock - */ -static inline void nfs_fscache_inode_unlock(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - smp_mb__before_clear_bit(); - clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); -} - -/* - * Decide if we should enable or disable local caching for this inode. - * - For now, with NFS, only regular files that are open read-only will be able - * to use the cache. - * - May be invoked multiple times in parallel by parallel nfs_open() functions. - */ -void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) -{ - if (NFS_FSCACHE(inode)) { - nfs_fscache_inode_lock(inode); - if ((filp->f_flags & O_ACCMODE) != O_RDONLY) - nfs_fscache_disable_inode_cookie(inode); - else - nfs_fscache_enable_inode_cookie(inode); - nfs_fscache_inode_unlock(inode); - } -} -EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); - -/* - * Replace a per-inode cookie due to revalidation detecting a file having - * changed on the server. + * Enable or disable caching for a file that is being opened as appropriate. + * The cookie is allocated when the inode is initialised, but is not enabled at + * that time. Enablement is deferred to file-open time to avoid stat() and + * access() thrashing the cache. + * + * For now, with NFS, only regular files that are open read-only will be able + * to use the cache. + * + * We enable the cache for an inode if we open it read-only and it isn't + * currently open for writing. We disable the cache if the inode is open + * write-only. + * + * The caller uses the file struct to pin i_writecount on the inode before + * calling us when a file is opened for writing, so we can make use of that. + * + * Note that this may be invoked multiple times in parallel by parallel + * nfs_open() functions. */ -void nfs_fscache_reset_inode_cookie(struct inode *inode) +void nfs_fscache_open_file(struct inode *inode, struct file *filp) { struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_server *nfss = NFS_SERVER(inode); - NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); + struct fscache_cookie *cookie = nfs_i_fscache(inode); - nfs_fscache_inode_lock(inode); - if (nfsi->fscache) { - /* retire the current fscache cache and get a new one */ - fscache_relinquish_cookie(nfsi->fscache, 1); - - nfsi->fscache = fscache_acquire_cookie( - nfss->nfs_client->fscache, - &nfs_fscache_inode_object_def, - nfsi); + if (!fscache_cookie_valid(cookie)) + return; - dfprintk(FSCACHE, - "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", - nfss, nfsi, old, nfsi->fscache); + if (inode_is_open_for_write(inode)) { + dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); + clear_bit(NFS_INO_FSCACHE, &nfsi->flags); + fscache_disable_cookie(cookie, true); + fscache_uncache_all_inode_pages(cookie, inode); + } else { + dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi); + fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode); + if (fscache_cookie_enabled(cookie)) + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); } - nfs_fscache_inode_unlock(inode); } +EXPORT_SYMBOL_GPL(nfs_fscache_open_file); /* * Release the caching state associated with a page, if the page isn't busy @@ -344,12 +260,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) int nfs_fscache_release_page(struct page *page, gfp_t gfp) { if (PageFsCache(page)) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; + struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host); BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", - cookie, page, nfsi); + cookie, page, NFS_I(page->mapping->host)); if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; @@ -367,13 +282,12 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) */ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct fscache_cookie *cookie = nfsi->fscache; + struct fscache_cookie *cookie = nfs_i_fscache(inode); BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", - cookie, page, nfsi); + cookie, page, NFS_I(inode)); fscache_wait_on_page_write(cookie, page); @@ -417,9 +331,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, dfprintk(FSCACHE, "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", - NFS_I(inode)->fscache, page, page->index, page->flags, inode); + nfs_i_fscache(inode), page, page->index, page->flags, inode); - ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, + ret = fscache_read_or_alloc_page(nfs_i_fscache(inode), page, nfs_readpage_from_fscache_complete, ctx, @@ -459,9 +373,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, int ret; dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", - NFS_I(inode)->fscache, npages, inode); + nfs_i_fscache(inode), npages, inode); - ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, + ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode), mapping, pages, nr_pages, nfs_readpage_from_fscache_complete, ctx, @@ -506,15 +420,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) dfprintk(FSCACHE, "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", - NFS_I(inode)->fscache, page, page->index, page->flags, sync); + nfs_i_fscache(inode), page, page->index, page->flags, sync); - ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); + ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL); dfprintk(FSCACHE, "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", page, page->index, page->flags, ret); if (ret != 0) { - fscache_uncache_page(NFS_I(inode)->fscache, page); + fscache_uncache_page(nfs_i_fscache(inode), page); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 4ecb76652eb..d7fe3e799f2 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -76,11 +76,9 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *); extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int); extern void nfs_fscache_release_super_cookie(struct super_block *); -extern void nfs_fscache_init_inode_cookie(struct inode *); -extern void nfs_fscache_release_inode_cookie(struct inode *); -extern void nfs_fscache_zap_inode_cookie(struct inode *); -extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); -extern void nfs_fscache_reset_inode_cookie(struct inode *); +extern void nfs_fscache_init_inode(struct inode *); +extern void nfs_fscache_clear_inode(struct inode *); +extern void nfs_fscache_open_file(struct inode *, struct file *); extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); extern int nfs_fscache_release_page(struct page *, gfp_t); @@ -187,12 +185,10 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} -static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_set_inode_cookie(struct inode *inode, - struct file *filp) {} -static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_init_inode(struct inode *inode) {} +static inline void nfs_fscache_clear_inode(struct inode *inode) {} +static inline void nfs_fscache_open_file(struct inode *inode, + struct file *filp) {} static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) { diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 033803c3664..b94f80420a5 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -95,7 +95,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, goto out; } - inode = nfs_fhget(sb, mntfh, fsinfo.fattr); + inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); ret = ERR_CAST(inode); @@ -120,14 +120,14 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, security_d_instantiate(ret, inode); spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { + if (IS_ROOT(ret) && !ret->d_fsdata && + !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { ret->d_fsdata = name; name = NULL; } spin_unlock(&ret->d_lock); out: - if (name) - kfree(name); + kfree(name); nfs_free_fattr(fsinfo.fattr); return ret; } diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index bc3968fa81e..567983d2c0e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -49,6 +49,7 @@ #include "internal.h" #include "netns.h" +#include "nfs4trace.h" #define NFS_UINT_MAXLEN 11 @@ -63,6 +64,7 @@ struct idmap_legacy_upcalldata { }; struct idmap { + struct rpc_pipe_dir_object idmap_pdo; struct rpc_pipe *idmap_pipe; struct idmap_legacy_upcalldata *idmap_upcall_data; struct mutex idmap_mutex; @@ -97,7 +99,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr) static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr) { struct nfs4_string *owner = fattr->owner_name; - __u32 uid; + kuid_t uid; if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)) return false; @@ -111,7 +113,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr) { struct nfs4_string *group = fattr->group_name; - __u32 gid; + kgid_t gid; if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)) return false; @@ -193,7 +195,8 @@ static int nfs_idmap_init_keyring(void) if (!cred) return -ENOMEM; - keyring = keyring_alloc(".id_resolver", 0, 0, cred, + keyring = keyring_alloc(".id_resolver", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA, NULL); @@ -261,29 +264,42 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, return desclen; } -static ssize_t nfs_idmap_request_key(struct key_type *key_type, - const char *name, size_t namelen, - const char *type, void *data, - size_t data_size, struct idmap *idmap) +static struct key *nfs_idmap_request_key(const char *name, size_t namelen, + const char *type, struct idmap *idmap) { - const struct cred *saved_cred; - struct key *rkey; char *desc; - struct user_key_payload *payload; + struct key *rkey; ssize_t ret; ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); if (ret <= 0) - goto out; + return ERR_PTR(ret); + + rkey = request_key(&key_type_id_resolver, desc, ""); + if (IS_ERR(rkey)) { + mutex_lock(&idmap->idmap_mutex); + rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, + desc, "", 0, idmap); + mutex_unlock(&idmap->idmap_mutex); + } + + kfree(desc); + return rkey; +} + +static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, + const char *type, void *data, + size_t data_size, struct idmap *idmap) +{ + const struct cred *saved_cred; + struct key *rkey; + struct user_key_payload *payload; + ssize_t ret; saved_cred = override_creds(id_resolver_cache); - if (idmap) - rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); - else - rkey = request_key(&key_type_id_resolver, desc, ""); + rkey = nfs_idmap_request_key(name, namelen, type, idmap); revert_creds(saved_cred); - kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); goto out; @@ -296,7 +312,7 @@ static ssize_t nfs_idmap_request_key(struct key_type *key_type, if (ret < 0) goto out_up; - payload = rcu_dereference(rkey->payload.data); + payload = rcu_dereference(rkey->payload.rcudata); if (IS_ERR_OR_NULL(payload)) { ret = PTR_ERR(payload); goto out_up; @@ -315,23 +331,6 @@ out: return ret; } -static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, - const char *type, void *data, - size_t data_size, struct idmap *idmap) -{ - ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, - name, namelen, type, data, - data_size, NULL); - if (ret < 0) { - mutex_lock(&idmap->idmap_mutex); - ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, - name, namelen, type, data, - data_size, idmap); - mutex_unlock(&idmap->idmap_mutex); - } - return ret; -} - /* ID -> Name */ static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen, struct idmap *idmap) @@ -404,16 +403,23 @@ static struct key_type key_type_id_resolver_legacy = { .request_key = nfs_idmap_legacy_upcall, }; -static void __nfs_idmap_unregister(struct rpc_pipe *pipe) +static void nfs_idmap_pipe_destroy(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { - if (pipe->dentry) + struct idmap *idmap = pdo->pdo_data; + struct rpc_pipe *pipe = idmap->idmap_pipe; + + if (pipe->dentry) { rpc_unlink(pipe->dentry); + pipe->dentry = NULL; + } } -static int __nfs_idmap_register(struct dentry *dir, - struct idmap *idmap, - struct rpc_pipe *pipe) +static int nfs_idmap_pipe_create(struct dentry *dir, + struct rpc_pipe_dir_object *pdo) { + struct idmap *idmap = pdo->pdo_data; + struct rpc_pipe *pipe = idmap->idmap_pipe; struct dentry *dentry; dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); @@ -423,36 +429,10 @@ static int __nfs_idmap_register(struct dentry *dir, return 0; } -static void nfs_idmap_unregister(struct nfs_client *clp, - struct rpc_pipe *pipe) -{ - struct net *net = clp->cl_net; - struct super_block *pipefs_sb; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - __nfs_idmap_unregister(pipe); - rpc_put_sb_net(net); - } -} - -static int nfs_idmap_register(struct nfs_client *clp, - struct idmap *idmap, - struct rpc_pipe *pipe) -{ - struct net *net = clp->cl_net; - struct super_block *pipefs_sb; - int err = 0; - - pipefs_sb = rpc_get_sb_net(net); - if (pipefs_sb) { - if (clp->cl_rpcclient->cl_dentry) - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - idmap, pipe); - rpc_put_sb_net(net); - } - return err; -} +static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = { + .create = nfs_idmap_pipe_create, + .destroy = nfs_idmap_pipe_destroy, +}; int nfs_idmap_new(struct nfs_client *clp) @@ -465,23 +445,31 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; + rpc_init_pipe_dir_object(&idmap->idmap_pdo, + &nfs_idmap_pipe_dir_object_ops, + idmap); + pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); if (IS_ERR(pipe)) { error = PTR_ERR(pipe); - kfree(idmap); - return error; - } - error = nfs_idmap_register(clp, idmap, pipe); - if (error) { - rpc_destroy_pipe_data(pipe); - kfree(idmap); - return error; + goto err; } idmap->idmap_pipe = pipe; mutex_init(&idmap->idmap_mutex); + error = rpc_add_pipe_dir_object(clp->cl_net, + &clp->cl_rpcclient->cl_pipedir_objects, + &idmap->idmap_pdo); + if (error) + goto err_destroy_pipe; + clp->cl_idmap = idmap; return 0; +err_destroy_pipe: + rpc_destroy_pipe_data(idmap->idmap_pipe); +err: + kfree(idmap); + return error; } void @@ -491,130 +479,26 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - nfs_idmap_unregister(clp, idmap->idmap_pipe); - rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; + rpc_remove_pipe_dir_object(clp->cl_net, + &clp->cl_rpcclient->cl_pipedir_objects, + &idmap->idmap_pdo); + rpc_destroy_pipe_data(idmap->idmap_pipe); kfree(idmap); } -static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, - struct super_block *sb) -{ - int err = 0; - - switch (event) { - case RPC_PIPEFS_MOUNT: - err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, - clp->cl_idmap, - clp->cl_idmap->idmap_pipe); - break; - case RPC_PIPEFS_UMOUNT: - if (clp->cl_idmap->idmap_pipe) { - struct dentry *parent; - - parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; - __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); - /* - * Note: This is a dirty hack. SUNRPC hook has been - * called already but simple_rmdir() call for the - * directory returned with error because of idmap pipe - * inside. Thus now we have to remove this directory - * here. - */ - if (rpc_rmdir(parent)) - printk(KERN_ERR "NFS: %s: failed to remove " - "clnt dir!\n", __func__); - } - break; - default: - printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, - event); - return -ENOTSUPP; - } - return err; -} - -static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) -{ - struct nfs_net *nn = net_generic(net, nfs_net_id); - struct dentry *cl_dentry; - struct nfs_client *clp; - int err; - -restart: - spin_lock(&nn->nfs_client_lock); - list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - /* Wait for initialisation to finish */ - if (clp->cl_cons_state == NFS_CS_INITING) { - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - err = nfs_wait_client_init_complete(clp); - nfs_put_client(clp); - if (err) - return NULL; - goto restart; - } - /* Skip nfs_clients that failed to initialise */ - if (clp->cl_cons_state < 0) - continue; - smp_rmb(); - if (clp->rpc_ops != &nfs_v4_clientops) - continue; - cl_dentry = clp->cl_idmap->idmap_pipe->dentry; - if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || - ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nn->nfs_client_lock); - return clp; - } - spin_unlock(&nn->nfs_client_lock); - return NULL; -} - -static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, - void *ptr) -{ - struct super_block *sb = ptr; - struct nfs_client *clp; - int error = 0; - - if (!try_module_get(THIS_MODULE)) - return 0; - - while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { - error = __rpc_pipefs_event(clp, event, sb); - nfs_put_client(clp); - if (error) - break; - } - module_put(THIS_MODULE); - return error; -} - -#define PIPEFS_NFS_PRIO 1 - -static struct notifier_block nfs_idmap_block = { - .notifier_call = rpc_pipefs_event, - .priority = SUNRPC_PIPEFS_NFS_PRIO, -}; - int nfs_idmap_init(void) { int ret; ret = nfs_idmap_init_keyring(); if (ret != 0) goto out; - ret = rpc_pipefs_notifier_register(&nfs_idmap_block); - if (ret != 0) - nfs_idmap_quit_keyring(); out: return ret; } void nfs_idmap_quit(void) { - rpc_pipefs_notifier_unregister(&nfs_idmap_block); nfs_idmap_quit_keyring(); } @@ -725,9 +609,9 @@ out1: return ret; } -static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen) { - return key_instantiate_and_link(key, data, strlen(data) + 1, + return key_instantiate_and_link(key, data, datalen, id_resolver_cache->thread_keyring, authkey); } @@ -737,6 +621,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; + size_t len; int ret = -ENOKEY; /* ret = -ENOKEY */ @@ -746,13 +631,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, case IDMAP_CONV_NAMETOID: if (strcmp(upcall->im_name, im->im_name) != 0) break; - sprintf(id_str, "%d", im->im_id); - ret = nfs_idmap_instantiate(key, authkey, id_str); + /* Note: here we store the NUL terminator too */ + len = sprintf(id_str, "%d", im->im_id) + 1; + ret = nfs_idmap_instantiate(key, authkey, id_str, len); break; case IDMAP_CONV_IDTONAME: if (upcall->im_id != im->im_id) break; - ret = nfs_idmap_instantiate(key, authkey, im->im_name); + len = strlen(im->im_name); + ret = nfs_idmap_instantiate(key, authkey, im->im_name, len); break; default: ret = -EINVAL; @@ -764,7 +651,7 @@ out: static ssize_t idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); + struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; struct key_construction *cons; struct idmap_msg im; @@ -836,43 +723,65 @@ idmap_release_pipe(struct inode *inode) nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); } -int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid) { struct idmap *idmap = server->nfs_client->cl_idmap; + __u32 id = -1; + int ret = 0; - if (nfs_map_string_to_numeric(name, namelen, uid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); + if (!nfs_map_string_to_numeric(name, namelen, &id)) + ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap); + if (ret == 0) { + *uid = make_kuid(&init_user_ns, id); + if (!uid_valid(*uid)) + ret = -ERANGE; + } + trace_nfs4_map_name_to_uid(name, namelen, id, ret); + return ret; } -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid) { struct idmap *idmap = server->nfs_client->cl_idmap; + __u32 id = -1; + int ret = 0; - if (nfs_map_string_to_numeric(name, namelen, gid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); + if (!nfs_map_string_to_numeric(name, namelen, &id)) + ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap); + if (ret == 0) { + *gid = make_kgid(&init_user_ns, id); + if (!gid_valid(*gid)) + ret = -ERANGE; + } + trace_nfs4_map_group_to_gid(name, namelen, id, ret); + return ret; } -int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) +int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; + __u32 id; + id = from_kuid(&init_user_ns, uid); if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); + ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap); if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); + ret = nfs_map_numeric_to_string(id, buf, buflen); + trace_nfs4_map_uid_to_name(buf, ret, id, ret); return ret; } -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; + __u32 id; + id = from_kgid(&init_user_ns, gid); if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); + ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap); if (ret < 0) - ret = nfs_map_numeric_to_string(gid, buf, buflen); + ret = nfs_map_numeric_to_string(id, buf, buflen); + trace_nfs4_map_gid_to_group(buf, ret, id, ret); return ret; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ebeb94ce1b0..9927913c97c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,7 +38,6 @@ #include <linux/slab.h> #include <linux/compat.h> #include <linux/freezer.h> -#include <linux/crc32.h> #include <asm/uaccess.h> @@ -48,11 +47,12 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" -#include "dns_resolve.h" #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 @@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); @@ -122,13 +122,13 @@ void nfs_clear_inode(struct inode *inode) WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); - nfs_fscache_release_inode_cookie(inode); + nfs_fscache_clear_inode(inode); } EXPORT_SYMBOL_GPL(nfs_clear_inode); void nfs_evict_inode(struct inode *inode) { - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); nfs_clear_inode(inode); } @@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity |= flags; + if (flags & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); +} + /* * Invalidate the local caches */ @@ -162,11 +173,17 @@ static void nfs_zap_caches_locked(struct inode *inode) memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - nfs_fscache_invalidate(inode); - } else { - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - } + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE); + } else + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE); + nfs_zap_label_cache_locked(nfsi); } void nfs_zap_caches(struct inode *inode) @@ -180,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; - nfs_fscache_invalidate(inode); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); spin_unlock(&inode->i_lock); } } @@ -202,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache); void nfs_invalidate_atime(struct inode *inode) { spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_invalidate_atime); @@ -237,6 +253,8 @@ nfs_find_actor(struct inode *inode, void *opaque) if (NFS_FILEID(inode) != fattr->fileid) return 0; + if ((S_IFMT & inode->i_mode) != (S_IFMT & fattr->mode)) + return 0; if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; if (is_bad_inode(inode) || NFS_STALE(inode)) @@ -255,12 +273,74 @@ nfs_init_locked(struct inode *inode, void *opaque) return 0; } +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +static void nfs_clear_label_invalid(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; + spin_unlock(&inode->i_lock); +} + +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ + int error; + + if (label == NULL) + return; + + if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { + error = security_inode_notifysecctx(inode, label->label, + label->len); + if (error) + printk(KERN_ERR "%s() %s %d " + "security_inode_notifysecctx() %d\n", + __func__, + (char *)label->label, + label->len, error); + nfs_clear_label_invalid(inode); + } +} + +struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) +{ + struct nfs4_label *label = NULL; + int minor_version = server->nfs_client->cl_minorversion; + + if (minor_version < 2) + return label; + + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + return label; + + label = kzalloc(sizeof(struct nfs4_label), flags); + if (label == NULL) + return ERR_PTR(-ENOMEM); + + label->label = kzalloc(NFS4_MAXLABELLEN, flags); + if (label->label == NULL) { + kfree(label); + return ERR_PTR(-ENOMEM); + } + label->len = NFS4_MAXLABELLEN; + + return label; +} +EXPORT_SYMBOL_GPL(nfs4_label_alloc); +#else +void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ +} +#endif +EXPORT_SYMBOL_GPL(nfs_setsecurity); + /* * This is our front-end to iget that looks up inodes by file handle * instead of inode number. */ struct inode * -nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) +nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_find_desc desc = { .fh = fh, @@ -298,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_mode = fattr->mode; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -332,8 +412,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_version = 0; inode->i_size = 0; clear_nlink(inode); - inode->i_uid = -2; - inode->i_gid = -2; + inode->i_uid = make_kuid(&init_user_ns, -2); + inode->i_gid = make_kgid(&init_user_ns, -2); inode->i_blocks = 0; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); nfsi->write_io = 0; @@ -344,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; else if (nfs_server_capable(inode, NFS_CAP_ATIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_REVAL_PAGECACHE; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE); if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); else if (nfs_server_capable(inode, NFS_CAP_NLINK)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -382,18 +462,21 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } + + nfs_setsecurity(inode, fattr, label); + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; nfsi->access_cache = RB_ROOT; - nfs_fscache_init_inode_cookie(inode); + nfs_fscache_init_inode(inode); unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + (unsigned long long)NFS_FILEID(inode), nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); @@ -431,6 +514,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; + trace_nfs_setattr_enter(inode); + /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { nfs_inode_dio_wait(inode); @@ -447,9 +532,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) NFS_PROTO(inode)->return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, fattr, attr); if (error == 0) - nfs_refresh_inode(inode, fattr); + error = nfs_refresh_inode(inode, fattr); nfs_free_fattr(fattr); out: + trace_nfs_setattr_exit(inode, error); return error; } EXPORT_SYMBOL_GPL(nfs_setattr); @@ -465,7 +551,6 @@ EXPORT_SYMBOL_GPL(nfs_setattr); */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { - loff_t oldsize; int err; err = inode_newsize_ok(inode, offset); @@ -473,11 +558,13 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) goto out; spin_lock(&inode->i_lock); - oldsize = inode->i_size; i_size_write(inode, offset); + /* Optimisation */ + if (offset == 0) + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); - truncate_pagecache(inode, oldsize, offset); + truncate_pagecache(inode, offset); out: return err; } @@ -503,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL); spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { @@ -513,12 +601,32 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); +static void nfs_request_parent_use_readdirplus(struct dentry *dentry) +{ + struct dentry *parent; + + parent = dget_parent(dentry); + nfs_force_use_readdirplus(parent->d_inode); + dput(parent); +} + +static bool nfs_need_revalidate_inode(struct inode *inode) +{ + if (NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + return true; + if (nfs_attribute_cache_expired(inode)) + return true; + return false; +} + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; + trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { nfs_inode_dio_wait(inode); @@ -540,15 +648,19 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - if (need_atime) - err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - else - err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (need_atime || nfs_need_revalidate_inode(inode)) { + struct nfs_server *server = NFS_SERVER(inode); + + if (server->caps & NFS_CAP_READDIRPLUS) + nfs_request_parent_use_readdirplus(dentry); + err = __nfs_revalidate_inode(server, inode); + } if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } out: + trace_nfs_getattr_exit(inode, err); return err; } EXPORT_SYMBOL_GPL(nfs_getattr); @@ -559,20 +671,22 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) l_ctx->lockowner.l_owner = current->files; l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); + nfs_iocounter_init(&l_ctx->io_count); } static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *pos; + struct nfs_lock_context *head = &ctx->lock_context; + struct nfs_lock_context *pos = head; - list_for_each_entry(pos, &ctx->lock_context.list, list) { + do { if (pos->lockowner.l_owner != current->files) continue; if (pos->lockowner.l_pid != current->tgid) continue; atomic_inc(&pos->count); return pos; - } + } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; } @@ -694,10 +808,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->dentry); - if (is_sync) - nfs_sb_deactive(sb); - else - nfs_sb_deactive_async(sb); + nfs_sb_deactive(sb); kfree(ctx->mdsthreshold); kfree(ctx); } @@ -712,16 +823,23 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context); * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +void nfs_inode_attach_open_context(struct nfs_open_context *ctx) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = ctx->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); - filp->private_data = get_nfs_open_context(ctx); spin_lock(&inode->i_lock); list_add(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); + +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) +{ + filp->private_data = get_nfs_open_context(ctx); + if (list_empty(&ctx->list)) + nfs_inode_attach_open_context(ctx); +} EXPORT_SYMBOL_GPL(nfs_file_set_open_context); /* @@ -747,10 +865,11 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { - struct inode *inode = filp->f_path.dentry->d_inode; struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { + struct inode *inode = ctx->dentry->d_inode; + filp->private_data = NULL; spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); @@ -771,7 +890,7 @@ int nfs_open(struct inode *inode, struct file *filp) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); return 0; } @@ -789,11 +908,14 @@ int __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { int status = -ESTALE; + struct nfs4_label *label = NULL; struct nfs_fattr *fattr = NULL; struct nfs_inode *nfsi = NFS_I(inode); - dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", - inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Lu)\n", + inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode)); + + trace_nfs_revalidate_inode_enter(inode); if (is_bad_inode(inode)) goto out; @@ -806,36 +928,48 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); - status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr); + + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (IS_ERR(label)) { + status = PTR_ERR(label); + goto out; + } + + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, label); if (status != 0) { - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); + (unsigned long long)NFS_FILEID(inode), status); if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); } - goto out; + goto err_out; } status = nfs_refresh_inode(inode, fattr); if (status) { - dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), status); - goto out; + (unsigned long long)NFS_FILEID(inode), status); + goto err_out; } if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", + nfs_setsecurity(inode, fattr, label); + + dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); + (unsigned long long)NFS_FILEID(inode)); - out: +err_out: + nfs4_label_free(label); +out: nfs_free_fattr(fattr); + trace_nfs_revalidate_inode_exit(inode, status); return status; } @@ -846,7 +980,7 @@ int nfs_attribute_timeout(struct inode *inode) return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -static int nfs_attribute_cache_expired(struct inode *inode) +int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) return 0; @@ -862,8 +996,7 @@ static int nfs_attribute_cache_expired(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) - && !nfs_attribute_cache_expired(inode)) + if (!nfs_need_revalidate_inode(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } @@ -872,21 +1005,29 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - + int ret; + if (mapping->nrpages != 0) { - int ret = invalidate_inode_pages2(mapping); + if (S_ISREG(inode->i_mode)) { + ret = nfs_sync_mapping(mapping); + if (ret < 0) + return ret; + } + ret = invalidate_inode_pages2(mapping); if (ret < 0) return ret; } - spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { + spin_lock(&inode->i_lock); memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); + } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, (long long)NFS_FILEID(inode)); + + dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n", + inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode)); return 0; } @@ -907,6 +1048,7 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode) int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); + unsigned long *bitlock = &nfsi->flags; int ret = 0; /* swapfiles are not supposed to be shared. */ @@ -918,8 +1060,46 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) if (ret < 0) goto out; } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping(inode, mapping); + + /* + * We must clear NFS_INO_INVALID_DATA first to ensure that + * invalidations that come in while we're shooting down the mappings + * are respected. But, that leaves a race window where one revalidator + * can clear the flag, and then another checks it before the mapping + * gets invalidated. Fix that by serializing access to this part of + * the function. + * + * At the same time, we need to allow other tasks to see whether we + * might be in the middle of invalidating the pages, so we only set + * the bit lock here if it looks like we're going to be doing that. + */ + for (;;) { + ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + if (ret) + goto out; + spin_lock(&inode->i_lock); + if (test_bit(NFS_INO_INVALIDATING, bitlock)) { + spin_unlock(&inode->i_lock); + continue; + } + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + break; + spin_unlock(&inode->i_lock); + goto out; + } + + set_bit(NFS_INO_INVALIDATING, bitlock); + smp_wmb(); + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); + trace_nfs_invalidate_mapping_enter(inode); + ret = nfs_invalidate_mapping(inode, mapping); + trace_nfs_invalidate_mapping_exit(inode, ret); + + clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_INO_INVALIDATING); out: return ret; } @@ -934,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr && inode->i_version == fattr->pre_change_attr) { inode->i_version = fattr->change_attr; if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); ret |= NFS_INO_INVALID_ATTR; } /* If we have atomic WCC data, we may update some attributes */ @@ -950,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); ret |= NFS_INO_INVALID_ATTR; } if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) @@ -961,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr ret |= NFS_INO_INVALID_ATTR; } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - nfs_fscache_invalidate(inode); - return ret; } @@ -1009,9 +1186,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Have any file permissions changed? */ if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; - if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid) + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; - if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid) + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ @@ -1022,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) - nfsi->cache_validity |= invalid; + nfs_set_cache_invalid(inode, invalid); nfsi->read_cache_jiffies = fattr->time_start; return 0; @@ -1099,8 +1276,9 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) { /* wireshark uses 32-bit AUTODIN crc and does a bitwise * not on the result */ - return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); + return nfs_fhandle_hash(fh); } +EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash); /* * _nfs_display_fhandle - display an NFS file handle on the console @@ -1145,6 +1323,7 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) } } } +EXPORT_SYMBOL_GPL(_nfs_display_fhandle); #endif /** @@ -1176,11 +1355,35 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } +/* + * Don't trust the change_attribute, mtime, ctime or size if + * a pnfs LAYOUTCOMMIT is outstanding + */ +static void nfs_inode_attrs_handle_layoutcommit(struct inode *inode, + struct nfs_fattr *fattr) +{ + if (pnfs_layoutcommit_outstanding(inode)) + fattr->valid &= ~(NFS_ATTR_FATTR_CHANGE | + NFS_ATTR_FATTR_MTIME | + NFS_ATTR_FATTR_CTIME | + NFS_ATTR_FATTR_SIZE); +} + static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { + int ret; + + trace_nfs_refresh_inode_enter(inode); + + nfs_inode_attrs_handle_layoutcommit(inode, fattr); + if (nfs_inode_attrs_need_update(inode, fattr)) - return nfs_update_inode(inode, fattr); - return nfs_check_inode_attributes(inode, fattr); + ret = nfs_update_inode(inode, fattr); + else + ret = nfs_check_inode_attributes(inode, fattr); + + trace_nfs_refresh_inode_exit(inode, ret); + return ret; } /** @@ -1209,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode); static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); + unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) { - nfsi->cache_validity |= NFS_INO_INVALID_DATA; - nfs_fscache_invalidate(inode); - } + if (S_ISDIR(inode->i_mode)) + invalid |= NFS_INO_INVALID_DATA; + nfs_set_cache_invalid(inode, invalid); if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); @@ -1242,6 +1443,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) spin_lock(&inode->i_lock); status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); + return status; } EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); @@ -1319,7 +1521,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); @@ -1340,7 +1542,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ - printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", + printk(KERN_DEBUG "NFS: %s: inode %lu mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); goto out_err; } @@ -1381,18 +1583,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_version = fattr->change_attr; } } else if (server->caps & NFS_CAP_CHANGE_ATTR) - invalid |= save_cache_validity; + nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ @@ -1402,10 +1606,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) || - new_isize > cur_isize) { + if ((nfsi->npages == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid &= ~NFS_INO_REVAL_PAGECACHE; } dprintk("NFS: isize change on server for file %s/%ld " "(%Ld to %Ld)\n", @@ -1415,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)new_isize); } } else - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); @@ -1423,7 +1628,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); else if (server->caps & NFS_CAP_ATIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { @@ -1434,29 +1640,32 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } } else if (server->caps & NFS_CAP_MODE) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_OWNER) { - if (inode->i_uid != fattr->uid) { + if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } } else if (server->caps & NFS_CAP_OWNER) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_GROUP) { - if (inode->i_gid != fattr->gid) { + if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } } else if (server->caps & NFS_CAP_OWNER_GROUP) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1469,7 +1678,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); } } else if (server->caps & NFS_CAP_NLINK) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1501,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid &= ~NFS_INO_INVALID_DATA; if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || (save_cache_validity & NFS_INO_REVAL_FORCED)) - nfsi->cache_validity |= invalid; - - if (invalid & NFS_INO_INVALID_DATA) - nfs_fscache_invalidate(inode); + nfs_set_cache_invalid(inode, invalid); return 0; out_err: @@ -1525,10 +1732,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb) return NULL; nfsi->flags = 0UL; nfsi->cache_validity = 0UL; -#ifdef CONFIG_NFS_V3_ACL - nfsi->acl_access = ERR_PTR(-EAGAIN); - nfsi->acl_default = ERR_PTR(-EAGAIN); -#endif #if IS_ENABLED(CONFIG_NFS_V4) nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ @@ -1637,12 +1840,11 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { nfs_clients_init(net); - return nfs_dns_resolver_cache_init(net); + return 0; } static void nfs_net_exit(struct net *net) { - nfs_dns_resolver_cache_destroy(net); nfs_cleanup_cb_ident_idr(net); } @@ -1660,10 +1862,6 @@ static int __init init_nfs_fs(void) { int err; - err = nfs_dns_resolver_init(); - if (err < 0) - goto out10;; - err = register_pernet_subsys(&nfs_net_ops); if (err < 0) goto out9; @@ -1729,8 +1927,6 @@ out7: out8: unregister_pernet_subsys(&nfs_net_ops); out9: - nfs_dns_resolver_destroy(); -out10: return err; } @@ -1743,7 +1939,6 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); - nfs_dns_resolver_destroy(); #ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); #endif diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f0e6c7df1a0..f415cbf9f6c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -5,6 +5,7 @@ #include "nfs4_fs.h" #include <linux/mount.h> #include <linux/security.h> +#include <linux/crc32.h> #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) @@ -87,8 +88,8 @@ struct nfs_parsed_mount_data { unsigned int namlen; unsigned int options; unsigned int bsize; - unsigned int auth_flavor_len; - rpc_authflavor_t auth_flavors[1]; + struct nfs_auth_info auth_info; + rpc_authflavor_t selected_flavor; char *client_address; unsigned int version; unsigned int minorversion; @@ -153,6 +154,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, rpc_authflavor_t); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); +void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); @@ -165,7 +167,7 @@ extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, - struct nfs4_sessionid *); + struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, struct nfs_subversion *); extern struct nfs_server *nfs4_create_server( @@ -173,6 +175,9 @@ extern struct nfs_server *nfs4_create_server( struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); +extern int nfs4_update_server(struct nfs_server *server, const char *hostname, + struct sockaddr *sap, size_t salen, + struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, @@ -185,6 +190,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans); +extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, + struct inode *); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -224,11 +231,26 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); -extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); +int nfs_iocounter_wait(struct nfs_io_counter *c); + +extern const struct nfs_pageio_ops nfs_pgio_rw_ops; +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); +void nfs_rw_header_free(struct nfs_pgio_header *); +void nfs_pgio_data_release(struct nfs_pgio_data *); +int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, + const struct rpc_call_ops *, int, int); +void nfs_free_request(struct nfs_page *req); + +static inline void nfs_iocounter_init(struct nfs_io_counter *c) +{ + c->flags = 0; + atomic_set(&c->io_count, 0); +} /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; @@ -248,6 +270,7 @@ extern int nfs4_decode_dirent(struct xdr_stream *, #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; extern const u32 nfs41_maxwrite_overhead; +extern const u32 nfs41_maxgetdevinfo_overhead; #endif /* nfs4proc.c */ @@ -255,15 +278,42 @@ extern const u32 nfs41_maxwrite_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); +static inline void nfs4_label_free(struct nfs4_label *label) +{ + if (label) { + kfree(label->label); + kfree(label); + } + return; +} + +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ + if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) + nfsi->cache_validity |= NFS_INO_INVALID_LABEL; +} +#else +static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } +static inline void nfs4_label_free(void *label) {} +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ +} +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ + /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour); + const char *ip_addr); /* dir.c */ -extern int nfs_access_cache_shrinker(struct shrinker *shrink, - struct shrink_control *sc); +extern void nfs_force_use_readdirplus(struct inode *dir); +extern unsigned long nfs_access_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); int nfs_create(struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct inode *, struct dentry *, umode_t); @@ -278,16 +328,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *) int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); loff_t nfs_file_llseek(struct file *, loff_t, int); int nfs_file_flush(struct file *, fl_owner_t); -ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int nfs_file_mmap(struct file *, struct vm_area_struct *); -ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); -ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, - size_t, unsigned int); int nfs_check_flags(int); int nfs_setlease(struct file *, long, struct file_lock **); @@ -310,6 +358,7 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); @@ -329,7 +378,6 @@ extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); -extern void nfs_sb_deactive_async(struct super_block *sb); /* namespace.c */ #define NFS_PATH_CANONICAL 1 @@ -348,24 +396,16 @@ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, const char *); -extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); +extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif struct nfs_pgio_completion_ops; /* read.c */ -extern struct nfs_read_header *nfs_readhdr_alloc(void); -extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); -extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_readdata_release(struct nfs_read_data *rdata); /* super.c */ void nfs_clone_super(struct super_block *, struct nfs_mount_info *); @@ -380,19 +420,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern struct nfs_write_header *nfs_writehdr_alloc(void); -extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); -extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); -extern int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, @@ -405,6 +436,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct nfs_commit_info *cinfo); int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max); +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, @@ -423,6 +455,8 @@ void nfs_request_remove_commit_list(struct nfs_page *req, void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq); +int nfs_key_timeout_notify(struct file *filp, struct inode *inode); +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, @@ -431,6 +465,13 @@ extern int nfs_migrate_page(struct address_space *, #define nfs_migrate_page NULL #endif +/* unlink.c */ +extern struct rpc_task * +nfs_async_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, struct dentry *new_dentry, + void (*complete)(struct rpc_task *, struct nfs_renamedata *)); +extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); + /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); @@ -441,11 +482,10 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_read_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_data *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour); + const char *ip_addr); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); @@ -568,3 +608,22 @@ u64 nfs_timespec_to_change_attr(const struct timespec *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } + +#ifdef CONFIG_CRC32 +/** + * nfs_fhandle_hash - calculate the crc32 hash for the filehandle + * @fh - pointer to filehandle + * + * returns a crc32 hash for the filehandle that is compatible with + * the one displayed by "wireshark". + */ +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); +} +#else +static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) +{ + return 0; +} +#endif diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 91a6faf811a..99a45283b9e 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -139,7 +139,10 @@ struct mnt_fhstatus { * nfs_mount - Obtain an NFS file handle for the given host and path * @info: pointer to mount request arguments * - * Uses default timeout parameters specified by underlying transport. + * Uses default timeout parameters specified by underlying transport. On + * successful return, the auth_flavs list and auth_flav_len will be populated + * with the list from the server or a faked-up list if the server didn't + * provide one. */ int nfs_mount(struct nfs_mount_request *info) { @@ -195,6 +198,15 @@ int nfs_mount(struct nfs_mount_request *info) dprintk("NFS: MNT request succeeded\n"); status = 0; + /* + * If the server didn't provide a flavor list, allow the + * client to try any flavor. + */ + if (info->version != NFS_MNT3_VERSION || *info->auth_flav_len == 0) { + dprintk("NFS: Faking up auth_flavs list\n"); + info->auth_flavs[0] = RPC_AUTH_NULL; + *info->auth_flav_len = 1; + } out: return status; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index dd057bc6b65..b5a0afc3ee1 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -177,11 +177,31 @@ out_nofree: return mnt; } +static int +nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_getattr(mnt, dentry, stat); + generic_fillattr(dentry->d_inode, stat); + return 0; +} + +static int +nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_setattr(dentry, attr); + return -EACCES; +} + const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, + .setattr = nfs_setattr, }; const struct inode_operations nfs_referral_inode_operations = { + .getattr = nfs_namespace_getattr, + .setattr = nfs_namespace_setattr, }; static void nfs_expire_automounts(struct work_struct *work) @@ -233,9 +253,8 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, dprintk("--> nfs_do_submount()\n"); - dprintk("%s: submounting on %s/%s\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("%s: submounting on %pd2\n", __func__, + dentry); if (page == NULL) goto out; devname = nfs_devname(dentry, page, PAGE_SIZE); @@ -260,7 +279,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct dentry *parent = dget_parent(dentry); /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL); dput(parent); if (err != 0) return ERR_PTR(err); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 06b9df49f7f..5f61b83f4a1 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) /* * typedef opaque nfsdata<>; */ -static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) +static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) { u32 recvd, count; __be32 *p; @@ -290,8 +290,13 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) fattr->mode = be32_to_cpup(p++); fattr->nlink = be32_to_cpup(p++); - fattr->uid = be32_to_cpup(p++); - fattr->gid = be32_to_cpup(p++); + fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++)); + if (!uid_valid(fattr->uid)) + goto out_uid; + fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++)); + if (!gid_valid(fattr->gid)) + goto out_gid; + fattr->size = be32_to_cpup(p++); fattr->du.nfs2.blocksize = be32_to_cpup(p++); @@ -313,6 +318,12 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); return 0; +out_uid: + dprintk("NFS: returned invalid uid\n"); + return -EINVAL; +out_gid: + dprintk("NFS: returned invalid gid\n"); + return -EINVAL; out_overflow: print_overflow_msg(__func__, xdr); return -EIO; @@ -351,11 +362,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr) else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); if (attr->ia_valid & ATTR_UID) - *p++ = cpu_to_be32(attr->ia_uid); + *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid)); else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); if (attr->ia_valid & ATTR_GID) - *p++ = cpu_to_be32(attr->ia_gid); + *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid)); else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); if (attr->ia_valid & ATTR_SIZE) @@ -602,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, * }; */ static void encode_readargs(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -618,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_readargs(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -638,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, * }; */ static void encode_writeargs(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -658,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_writeargs(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; @@ -846,7 +857,7 @@ out_default: * }; */ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -867,7 +878,7 @@ out_default: } static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { /* All NFSv2 writes are "file sync" writes */ result->verf->committed = NFS_FILE_SYNC; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 4a1aafba6a2..8f854dde415 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -10,179 +10,7 @@ #define NFSDBG_FACILITY NFSDBG_PROC -ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl; - int pos=0, len=0; - -# define output(s) do { \ - if (pos + sizeof(s) <= size) { \ - memcpy(buffer + pos, s, sizeof(s)); \ - pos += sizeof(s); \ - } \ - len += sizeof(s); \ - } while(0) - - acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - output("system.posix_acl_access"); - posix_acl_release(acl); - } - - if (S_ISDIR(inode->i_mode)) { - acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - output("system.posix_acl_default"); - posix_acl_release(acl); - } - } - -# undef output - - if (!buffer || len <= size) - return len; - return -ERANGE; -} - -ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl; - int type, error = 0; - - if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) - type = ACL_TYPE_ACCESS; - else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) - type = ACL_TYPE_DEFAULT; - else - return -EOPNOTSUPP; - - acl = nfs3_proc_getacl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - else if (acl) { - if (type == ACL_TYPE_ACCESS && acl->a_count == 0) - error = -ENODATA; - else - error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - } else - error = -ENODATA; - - return error; -} - -int nfs3_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl; - int type, error; - - if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) - type = ACL_TYPE_ACCESS; - else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) - type = ACL_TYPE_DEFAULT; - else - return -EOPNOTSUPP; - - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - error = nfs3_proc_setacl(inode, type, acl); - posix_acl_release(acl); - - return error; -} - -int nfs3_removexattr(struct dentry *dentry, const char *name) -{ - struct inode *inode = dentry->d_inode; - int type; - - if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) - type = ACL_TYPE_ACCESS; - else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) - type = ACL_TYPE_DEFAULT; - else - return -EOPNOTSUPP; - - return nfs3_proc_setacl(inode, type, NULL); -} - -static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) -{ - if (!IS_ERR(nfsi->acl_access)) { - posix_acl_release(nfsi->acl_access); - nfsi->acl_access = ERR_PTR(-EAGAIN); - } - if (!IS_ERR(nfsi->acl_default)) { - posix_acl_release(nfsi->acl_default); - nfsi->acl_default = ERR_PTR(-EAGAIN); - } -} - -void nfs3_forget_cached_acls(struct inode *inode) -{ - dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, - inode->i_ino); - spin_lock(&inode->i_lock); - __nfs3_forget_cached_acls(NFS_I(inode)); - spin_unlock(&inode->i_lock); -} - -static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct posix_acl *acl = ERR_PTR(-EINVAL); - - spin_lock(&inode->i_lock); - switch(type) { - case ACL_TYPE_ACCESS: - acl = nfsi->acl_access; - break; - - case ACL_TYPE_DEFAULT: - acl = nfsi->acl_default; - break; - - default: - goto out; - } - if (IS_ERR(acl)) - acl = ERR_PTR(-EAGAIN); - else - acl = posix_acl_dup(acl); -out: - spin_unlock(&inode->i_lock); - dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, - inode->i_ino, type, acl); - return acl; -} - -static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, - inode->i_ino, acl, dfacl); - spin_lock(&inode->i_lock); - __nfs3_forget_cached_acls(NFS_I(inode)); - if (!IS_ERR(acl)) - nfsi->acl_access = posix_acl_dup(acl); - if (!IS_ERR(dfacl)) - nfsi->acl_default = posix_acl_dup(dfacl); - spin_unlock(&inode->i_lock); -} - -struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +struct posix_acl *nfs3_get_acl(struct inode *inode, int type) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFSACL_MAXPAGES] = { }; @@ -198,7 +26,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) .rpc_argp = &args, .rpc_resp = &res, }; - struct posix_acl *acl; int status, count; if (!nfs_server_capable(inode, NFS_CAP_ACLS)) @@ -207,10 +34,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) status = nfs_revalidate_inode(server, inode); if (status < 0) return ERR_PTR(status); - acl = nfs3_get_cached_acl(inode, type); - if (acl != ERR_PTR(-EAGAIN)) - return acl; - acl = NULL; /* * Only get the access acl when explicitly requested: We don't @@ -257,40 +80,41 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) } if (res.acl_access != NULL) { - if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) || + res.acl_access->a_count == 0) { posix_acl_release(res.acl_access); res.acl_access = NULL; } } - nfs3_cache_acls(inode, - (res.mask & NFS_ACL) ? res.acl_access : ERR_PTR(-EINVAL), - (res.mask & NFS_DFACL) ? res.acl_default : ERR_PTR(-EINVAL)); - switch(type) { - case ACL_TYPE_ACCESS: - acl = res.acl_access; - res.acl_access = NULL; - break; + if (res.mask & NFS_ACL) + set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access); + else + forget_cached_acl(inode, ACL_TYPE_ACCESS); - case ACL_TYPE_DEFAULT: - acl = res.acl_default; - res.acl_default = NULL; + if (res.mask & NFS_DFACL) + set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default); + else + forget_cached_acl(inode, ACL_TYPE_DEFAULT); + + nfs_free_fattr(res.fattr); + if (type == ACL_TYPE_ACCESS) { + posix_acl_release(res.acl_default); + return res.acl_access; + } else { + posix_acl_release(res.acl_access); + return res.acl_default; } getout: posix_acl_release(res.acl_access); posix_acl_release(res.acl_default); nfs_free_fattr(res.fattr); - - if (status != 0) { - posix_acl_release(acl); - acl = ERR_PTR(status); - } - return acl; + return ERR_PTR(status); } -static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, - struct posix_acl *dfacl) +static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_fattr *fattr; @@ -353,7 +177,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, switch (status) { case 0: status = nfs_refresh_inode(inode, fattr); - nfs3_cache_acls(inode, acl, dfacl); + set_cached_acl(inode, ACL_TYPE_ACCESS, acl); + set_cached_acl(inode, ACL_TYPE_DEFAULT, dfacl); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: @@ -373,40 +198,43 @@ out: return status; } -int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + int ret; + ret = __nfs3_proc_setacls(inode, acl, dfacl); + return (ret == -EOPNOTSUPP) ? 0 : ret; + +} + +int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct posix_acl *alloc = NULL, *dfacl = NULL; int status; if (S_ISDIR(inode->i_mode)) { switch(type) { - case ACL_TYPE_ACCESS: - alloc = dfacl = nfs3_proc_getacl(inode, - ACL_TYPE_DEFAULT); - if (IS_ERR(alloc)) - goto fail; - break; - - case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = nfs3_proc_getacl(inode, - ACL_TYPE_ACCESS); - if (IS_ERR(alloc)) - goto fail; - break; - - default: - return -EINVAL; + case ACL_TYPE_ACCESS: + alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; } - } else if (type != ACL_TYPE_ACCESS) - return -EINVAL; + } if (acl == NULL) { alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; } - status = nfs3_proc_setacls(inode, acl, dfacl); + status = __nfs3_proc_setacls(inode, acl, dfacl); posix_acl_release(alloc); return status; @@ -414,27 +242,51 @@ fail: return PTR_ERR(alloc); } -int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, - umode_t mode) +const struct xattr_handler *nfs3_xattr_handlers[] = { + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, + NULL, +}; + +static int +nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data, + size_t size, ssize_t *result) { - struct posix_acl *dfacl, *acl; - int error = 0; + struct posix_acl *acl; + char *p = data + *result; - dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(dfacl)) { - error = PTR_ERR(dfacl); - return (error == -EOPNOTSUPP) ? 0 : error; - } - if (!dfacl) + acl = get_acl(inode, type); + if (!acl) return 0; - acl = posix_acl_dup(dfacl); - error = posix_acl_create(&acl, GFP_KERNEL, &mode); - if (error < 0) - goto out_release_dfacl; - error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? - dfacl : NULL); + posix_acl_release(acl); -out_release_dfacl: - posix_acl_release(dfacl); - return error; + + *result += strlen(name); + *result += 1; + if (!size) + return 0; + if (*result > size) + return -ERANGE; + + strcpy(p, name); + return 0; +} + +ssize_t +nfs3_listxattr(struct dentry *dentry, char *data, size_t size) +{ + struct inode *inode = dentry->d_inode; + ssize_t result = 0; + int error; + + error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS, + POSIX_ACL_XATTR_ACCESS, data, size, &result); + if (error) + return error; + + error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT, + POSIX_ACL_XATTR_DEFAULT, data, size, &result); + if (error) + return error; + return result; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 70efb63b1e4..f0afa291fd5 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -18,6 +18,7 @@ #include <linux/lockd/bind.h> #include <linux/nfs_mount.h> #include <linux/freezer.h> +#include <linux/xattr.h> #include "iostat.h" #include "internal.h" @@ -33,7 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; @@ -98,7 +99,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, struct nfs4_label *label) { struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR], @@ -143,7 +144,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, static int nfs3_proc_lookup(struct inode *dir, struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + struct nfs4_label *label) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), @@ -300,7 +302,7 @@ static int nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_ status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); nfs_post_op_update_inode(dir, data->res.dir_attr); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); return status; } @@ -316,11 +318,11 @@ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { + struct posix_acl *default_acl, *acl; struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -335,11 +337,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, data->arg.create.createmode = NFS3_CREATE_UNCHECKED; if (flags & O_EXCL) { data->arg.create.createmode = NFS3_CREATE_EXCLUSIVE; - data->arg.create.verifier[0] = jiffies; - data->arg.create.verifier[1] = current->pid; + data->arg.create.verifier[0] = cpu_to_be32(jiffies); + data->arg.create.verifier[1] = cpu_to_be32(current->pid); } - sattr->ia_mode &= ~current_umask(); + status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl); + if (status) + goto out; for (;;) { status = nfs3_do_create(dir, dentry, data); @@ -365,7 +369,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } if (status != 0) - goto out; + goto out_release_acls; /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */ @@ -384,9 +388,14 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_post_op_update_inode(dentry->d_inode, data->res.fattr); dprintk("NFS reply setattr (post-create): %d\n", status); if (status != 0) - goto out; + goto out_release_acls; } - status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); + + status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl); + +out_release_acls: + posix_acl_release(acl); + posix_acl_release(default_acl); out: nfs3_free_createdata(data); dprintk("NFS reply create: %d\n", status); @@ -470,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, } static int -nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .old_name = old_name, - .new_dir = NFS_FH(new_dir), - .new_name = new_name, - }; - struct nfs_renameres res; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status = -ENOMEM; - - dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - - res.old_fattr = nfs_alloc_fattr(); - res.new_fattr = nfs_alloc_fattr(); - if (res.old_fattr == NULL || res.new_fattr == NULL) - goto out; - - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_post_op_update_inode(old_dir, res.old_fattr); - nfs_post_op_update_inode(new_dir, res.new_fattr); -out: - nfs_free_fattr(res.old_fattr); - nfs_free_fattr(res.new_fattr); - dprintk("NFS reply rename: %d\n", status); - return status; -} - -static int nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { struct nfs3_linkargs arg = { @@ -547,7 +521,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) @@ -571,18 +545,20 @@ out: static int nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { + struct posix_acl *default_acl, *acl; struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); - - sattr->ia_mode &= ~current_umask(); + dprintk("NFS call mkdir %pd\n", dentry); data = nfs3_alloc_createdata(); if (data == NULL) goto out; + status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl); + if (status) + goto out; + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR]; data->arg.mkdir.fh = NFS_FH(dir); data->arg.mkdir.name = dentry->d_name.name; @@ -591,9 +567,13 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) status = nfs3_do_create(dir, dentry, data); if (status != 0) - goto out; + goto out_release_acls; - status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); + status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl); + +out_release_acls: + posix_acl_release(acl); + posix_acl_release(default_acl); out: nfs3_free_createdata(data); dprintk("NFS reply mkdir: %d\n", status); @@ -690,19 +670,21 @@ static int nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { + struct posix_acl *default_acl, *acl; struct nfs3_createdata *data; - umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, + dprintk("NFS call mknod %pd %u:%u\n", dentry, MAJOR(rdev), MINOR(rdev)); - sattr->ia_mode &= ~current_umask(); - data = nfs3_alloc_createdata(); if (data == NULL) goto out; + status = posix_acl_create(dir, &sattr->ia_mode, &default_acl, &acl); + if (status) + goto out; + data->msg.rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD]; data->arg.mknod.fh = NFS_FH(dir); data->arg.mknod.name = dentry->d_name.name; @@ -730,8 +712,13 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = nfs3_do_create(dir, dentry, data); if (status != 0) - goto out; - status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); + goto out_release_acls; + + status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl); + +out_release_acls: + posix_acl_release(acl); + posix_acl_release(default_acl); out: nfs3_free_createdata(data); dprintk("NFS reply mknod: %d\n", status); @@ -808,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -820,17 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); + return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -841,16 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - rpc_call_start(task); -} - static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { rpc_call_start(task); @@ -872,7 +855,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } @@ -901,20 +884,28 @@ static const struct inode_operations nfs3_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, +#ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .removexattr = generic_removexattr, + .get_acl = nfs3_get_acl, + .set_acl = nfs3_set_acl, +#endif }; static const struct inode_operations nfs3_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, +#ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, - .getxattr = nfs3_getxattr, - .setxattr = nfs3_setxattr, - .removexattr = nfs3_removexattr, + .getxattr = generic_getxattr, + .setxattr = generic_setxattr, + .removexattr = generic_removexattr, + .get_acl = nfs3_get_acl, + .set_acl = nfs3_set_acl, +#endif }; const struct nfs_rpc_ops nfs_v3_clientops = { @@ -936,7 +927,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .unlink_setup = nfs3_proc_unlink_setup, .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, .unlink_done = nfs3_proc_unlink_done, - .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, .rename_done = nfs3_proc_rename_done, @@ -950,19 +940,16 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .fsinfo = nfs3_proc_fsinfo, .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, + .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare, .read_setup = nfs3_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, - .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, - .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, .commit_done = nfs3_commit_done, .lock = nfs3_proc_lock, - .clear_acl_cache = nfs3_forget_cached_acls, + .clear_acl_cache = forget_all_cached_acls, .close_context = nfs_close_context, .have_delegation = nfs3_have_delegation, .return_delegation = nfs3_return_delegation, diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index cc471c72523..d6a98949af1 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -12,6 +12,9 @@ static struct nfs_subversion nfs_v3 = { .rpc_vers = &nfs_version3, .rpc_ops = &nfs_v3_clientops, .sops = &nfs_sops, +#ifdef CONFIG_NFS_V3_ACL + .xattr = nfs3_xattr_handlers, +#endif }; static int __init init_nfs_v3(void) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index bffc32406fb..8f4cbe7f4aa 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -592,13 +592,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr) if (attr->ia_valid & ATTR_UID) { *p++ = xdr_one; - *p++ = cpu_to_be32(attr->ia_uid); + *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid)); } else *p++ = xdr_zero; if (attr->ia_valid & ATTR_GID) { *p++ = xdr_one; - *p++ = cpu_to_be32(attr->ia_gid); + *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid)); } else *p++ = xdr_zero; @@ -657,8 +657,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode; fattr->nlink = be32_to_cpup(p++); - fattr->uid = be32_to_cpup(p++); - fattr->gid = be32_to_cpup(p++); + fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++)); + if (!uid_valid(fattr->uid)) + goto out_uid; + fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++)); + if (!gid_valid(fattr->gid)) + goto out_gid; p = xdr_decode_size3(p, &fattr->size); p = xdr_decode_size3(p, &fattr->du.nfs3.used); @@ -675,6 +679,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) fattr->valid |= NFS_ATTR_FATTR_V3; return 0; +out_uid: + dprintk("NFS: returned invalid uid\n"); + return -EINVAL; +out_gid: + dprintk("NFS: returned invalid gid\n"); + return -EINVAL; out_overflow: print_overflow_msg(__func__, xdr); return -EIO; @@ -943,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, * }; */ static void encode_read3args(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -956,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_read3args(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -982,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, * }; */ static void encode_write3args(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -998,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_write3args(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; @@ -1579,7 +1589,7 @@ out_default: * }; */ static int decode_read3resok(struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { u32 eof, count, ocount, recvd; __be32 *p; @@ -1615,7 +1625,7 @@ out_overflow: } static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -1663,7 +1673,7 @@ out_status: * }; */ static int decode_write3resok(struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { __be32 *p; @@ -1687,7 +1697,7 @@ out_eio: } static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a3f488b074a..ba2affa5194 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,10 +9,20 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H +#if defined(CONFIG_NFS_V4_2) +#define NFS4_MAX_MINOR_VERSION 2 +#elif defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MINOR_VERSION 1 +#else +#define NFS4_MAX_MINOR_VERSION 0 +#endif + #if IS_ENABLED(CONFIG_NFS_V4) #define NFS4_MAX_LOOP_ON_RECOVER (10) +#include <linux/seqlock.h> + struct idmap; enum nfs4_client_state { @@ -27,6 +37,8 @@ enum nfs4_client_state { NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, NFS4CLNT_BIND_CONN_TO_SESSION, + NFS4CLNT_MOVED, + NFS4CLNT_LEASE_MOVED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -34,19 +46,21 @@ enum nfs4_client_state { struct nfs4_minor_version_ops { u32 minor_version; + unsigned init_caps; - int (*call_sync)(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res); + int (*init_client)(struct nfs_client *); + void (*shutdown_client)(struct nfs_client *); bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + int (*free_lock_state)(struct nfs_server *, + struct nfs4_lock_state *); + const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; + const struct nfs4_mig_recovery_ops *mig_recovery_ops; }; #define NFS_SEQID_CONFIRMED 1 @@ -90,6 +104,8 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; + seqcount_t so_reclaim_seqcount; + struct mutex so_delegreturn_mutex; }; enum { @@ -128,6 +144,7 @@ struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 0 +#define NFS_LOCK_LOST 1 unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; @@ -139,12 +156,14 @@ struct nfs4_lock_state { enum { LK_STATE_IN_USE, NFS_DELEGATED_STATE, /* Current stateid is delegation */ + NFS_OPEN_STATE, /* OPEN stateid is set */ NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ + NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ }; struct nfs4_state { @@ -184,8 +203,7 @@ struct nfs4_state_recovery_ops { int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); int (*establish_clid)(struct nfs_client *, struct rpc_cred *); - struct rpc_cred * (*get_clid_cred)(struct nfs_client *); - int (*reclaim_complete)(struct nfs_client *); + int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *); int (*detect_trunking)(struct nfs_client *, struct nfs_client **, struct rpc_cred *); }; @@ -196,6 +214,12 @@ struct nfs4_state_maintenance_ops { int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; +struct nfs4_mig_recovery_ops { + int (*get_locations)(struct inode *, struct nfs4_fs_locations *, + struct page *, struct rpc_cred *); + int (*fsid_present)(struct inode *, struct rpc_cred *); +}; + extern const struct dentry_operations nfs4_dentry_operations; /* dir.c */ @@ -206,15 +230,16 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); -struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); +struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *); struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); +int nfs4_replace_transport(struct nfs_server *server, + const struct nfs4_fs_locations *locations); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); -extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); +extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_destroy_clientid(struct nfs_client *clp); @@ -224,11 +249,17 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); +extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, + struct page *page, struct rpc_cred *); +extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); -extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; +extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode); #if defined(CONFIG_NFS_V4_1) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -236,12 +267,10 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } -extern int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); +extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, @@ -261,18 +290,63 @@ is_ds_client(struct nfs_client *clp) { return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS; } -#else /* CONFIG_NFS_v4_1 */ -static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) + +static inline bool +_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, + struct rpc_clnt **clntp, struct rpc_message *msg) { - return NULL; + struct rpc_cred *newcred = NULL; + rpc_authflavor_t flavor; + + if (test_bit(sp4_mode, &clp->cl_sp4_flags)) { + spin_lock(&clp->cl_lock); + if (clp->cl_machine_cred != NULL) + /* don't call get_rpccred on the machine cred - + * a reference will be held for life of clp */ + newcred = clp->cl_machine_cred; + spin_unlock(&clp->cl_lock); + msg->rpc_cred = newcred; + + flavor = clp->cl_rpcclient->cl_auth->au_flavor; + WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I && + flavor != RPC_AUTH_GSS_KRB5P); + *clntp = clp->cl_rpcclient; + + return true; + } + return false; } -static inline int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - struct rpc_task *task) +/* + * Function responsible for determining if an rpc_message should use the + * machine cred under SP4_MACH_CRED and if so switching the credential and + * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p). + * Should be called before rpc_call_sync/rpc_call_async. + */ +static inline void +nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, + struct rpc_clnt **clntp, struct rpc_message *msg) { - rpc_call_start(task); - return 0; + _nfs4_state_protect(clp, sp4_mode, clntp, msg); +} + +/* + * Special wrapper to nfs4_state_protect for write. + * If WRITE can use machine cred but COMMIT cannot, make sure all writes + * that use machine cred use NFS_FILE_SYNC. + */ +static inline void +nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, + struct rpc_message *msg, struct nfs_pgio_data *wdata) +{ + if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && + !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) + wdata->args.stable = NFS_FILE_SYNC; +} +#else /* CONFIG_NFS_v4_1 */ +static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) +{ + return NULL; } static inline bool @@ -286,16 +360,32 @@ is_ds_client(struct nfs_client *clp) { return false; } + +static inline void +nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, + struct rpc_clnt **clntp, struct rpc_message *msg) +{ +} + +static inline void +nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, + struct rpc_message *msg, struct nfs_pgio_data *wdata) +{ +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; extern const u32 nfs4_fattr_bitmap[3]; -extern const u32 nfs4_statfs_bitmap[2]; -extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[3]; +extern const u32 nfs4_pathconf_bitmap[3]; extern const u32 nfs4_fsinfo_bitmap[3]; -extern const u32 nfs4_fs_locations_bitmap[2]; +extern const u32 nfs4_fs_locations_bitmap[3]; +void nfs40_shutdown_client(struct nfs_client *); +void nfs41_shutdown_client(struct nfs_client *); +int nfs40_init_client(struct nfs_client *); +int nfs41_init_client(struct nfs_client *); void nfs4_free_client(struct nfs_client *); struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *); @@ -307,7 +397,7 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); int nfs4_discover_server_trunking(struct nfs_client *clp, @@ -315,7 +405,6 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, int nfs40_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) -struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); int nfs41_discover_server_trunking(struct nfs_client *clp, struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); @@ -338,18 +427,21 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs_inode_find_state_and_recover(struct inode *inode, const nfs4_stateid *stateid); +extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern int nfs4_wait_clnt_recover(struct nfs_client *clp); extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); -extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); +extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); +extern int nfs4_schedule_migration_recovery(const struct nfs_server *); +extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, +extern int nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, fmode_t, const struct nfs_lockowner *); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); @@ -370,6 +462,7 @@ struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short send_implementation_id; +extern bool recover_lost_locks; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; @@ -408,10 +501,27 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei return memcmp(dst, src, sizeof(*dst)) == 0; } +static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) +{ + return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0; +} + +static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2) +{ + return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; +} + +static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) +{ + return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; +} + #else #define nfs4_close_state(a, b) do { } while (0) #define nfs4_close_sync(a, b) do { } while (0) +#define nfs4_state_protect(a, b, c, d) do { } while (0) +#define nfs4_state_protect_write(a, b, c, d) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index acc34726812..aa9ef487604 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -6,9 +6,11 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/nfs_mount.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include "internal.h" #include "callback.h" #include "delegation.h" @@ -29,31 +31,149 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; -retry: - if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) - return -ENOMEM; + idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + if (ret >= 0) + clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); - if (ret == -EAGAIN) - goto retry; - return ret; + idr_preload_end(); + return ret < 0 ? ret : 0; } #ifdef CONFIG_NFS_V4_1 -static void nfs4_shutdown_session(struct nfs_client *clp) +/** + * Per auth flavor data server rpc clients + */ +struct nfs4_ds_server { + struct list_head list; /* ds_clp->cl_ds_clients */ + struct rpc_clnt *rpc_clnt; +}; + +/** + * Common lookup case for DS I/O + */ +static struct nfs4_ds_server * +nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor) +{ + struct nfs4_ds_server *dss; + + rcu_read_lock(); + list_for_each_entry_rcu(dss, &ds_clp->cl_ds_clients, list) { + if (dss->rpc_clnt->cl_auth->au_flavor != flavor) + continue; + goto out; + } + dss = NULL; +out: + rcu_read_unlock(); + return dss; +} + +static struct nfs4_ds_server * +nfs4_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor, + struct nfs4_ds_server *new) +{ + struct nfs4_ds_server *dss; + + spin_lock(&ds_clp->cl_lock); + list_for_each_entry(dss, &ds_clp->cl_ds_clients, list) { + if (dss->rpc_clnt->cl_auth->au_flavor != flavor) + continue; + goto out; + } + if (new) + list_add_rcu(&new->list, &ds_clp->cl_ds_clients); + dss = new; +out: + spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */ + return dss; +} + +static struct nfs4_ds_server * +nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor) +{ + struct nfs4_ds_server *dss; + + dss = kmalloc(sizeof(*dss), GFP_NOFS); + if (dss == NULL) + return ERR_PTR(-ENOMEM); + + dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor); + if (IS_ERR(dss->rpc_clnt)) { + int err = PTR_ERR(dss->rpc_clnt); + kfree (dss); + return ERR_PTR(err); + } + INIT_LIST_HEAD(&dss->list); + + return dss; +} + +static void +nfs4_free_ds_server(struct nfs4_ds_server *dss) +{ + rpc_release_client(dss->rpc_clnt); + kfree(dss); +} + +/** +* Find or create a DS rpc client with th MDS server rpc client auth flavor +* in the nfs_client cl_ds_clients list. +*/ +struct rpc_clnt * +nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode) +{ + struct nfs4_ds_server *dss, *new; + rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor; + + dss = nfs4_find_ds_client(ds_clp, flavor); + if (dss != NULL) + goto out; + new = nfs4_alloc_ds_server(ds_clp, flavor); + if (IS_ERR(new)) + return ERR_CAST(new); + dss = nfs4_add_ds_client(ds_clp, flavor, new); + if (dss != new) + nfs4_free_ds_server(new); +out: + return dss->rpc_clnt; +} +EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client); + +static void +nfs4_shutdown_ds_clients(struct nfs_client *clp) +{ + struct nfs4_ds_server *dss; + LIST_HEAD(shutdown_list); + + while (!list_empty(&clp->cl_ds_clients)) { + dss = list_entry(clp->cl_ds_clients.next, + struct nfs4_ds_server, list); + list_del(&dss->list); + rpc_shutdown_client(dss->rpc_clnt); + kfree (dss); + } +} + +void nfs41_shutdown_client(struct nfs_client *clp) { if (nfs4_has_session(clp)) { + nfs4_shutdown_ds_clients(clp); nfs4_destroy_session(clp->cl_session); nfs4_destroy_clientid(clp); } } -#else /* CONFIG_NFS_V4_1 */ -static void nfs4_shutdown_session(struct nfs_client *clp) +#endif /* CONFIG_NFS_V4_1 */ + +void nfs40_shutdown_client(struct nfs_client *clp) { + if (clp->cl_slot_tbl) { + nfs4_shutdown_slot_table(clp->cl_slot_tbl); + kfree(clp->cl_slot_tbl); + } } -#endif /* CONFIG_NFS_V4_1 */ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) { @@ -66,12 +186,19 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) if (err) goto error; + if (cl_init->minorversion > NFS4_MAX_MINOR_VERSION) { + err = -EINVAL; + goto error; + } + spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); + INIT_LIST_HEAD(&clp->cl_ds_clients); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + clp->cl_mig_gen = 1; return clp; error: @@ -92,7 +219,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state)) nfs4_kill_renewd(clp); - nfs4_shutdown_session(clp); + clp->cl_mvops->shutdown_client(clp); nfs4_destroy_callback(clp); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); @@ -139,34 +266,77 @@ static int nfs4_init_callback(struct nfs_client *clp) return 0; } +/** + * nfs40_init_client - nfs_client initialization tasks for NFSv4.0 + * @clp - nfs_client to initialize + * + * Returns zero on success, or a negative errno if some error occurred. + */ +int nfs40_init_client(struct nfs_client *clp) +{ + struct nfs4_slot_table *tbl; + int ret; + + tbl = kzalloc(sizeof(*tbl), GFP_NOFS); + if (tbl == NULL) + return -ENOMEM; + + ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, + "NFSv4.0 transport Slot table"); + if (ret) { + kfree(tbl); + return ret; + } + + clp->cl_slot_tbl = tbl; + return 0; +} + +#if defined(CONFIG_NFS_V4_1) + +/** + * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+ + * @clp - nfs_client to initialize + * + * Returns zero on success, or a negative errno if some error occurred. + */ +int nfs41_init_client(struct nfs_client *clp) +{ + struct nfs4_session *session = NULL; + + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); + return 0; +} + +#endif /* CONFIG_NFS_V4_1 */ + /* * Initialize the minor version specific parts of an NFS4 client record */ static int nfs4_init_client_minor_version(struct nfs_client *clp) { -#if defined(CONFIG_NFS_V4_1) - if (clp->cl_mvops->minor_version) { - struct nfs4_session *session = NULL; - /* - * Create the session and mark it expired. - * When a SEQUENCE operation encounters the expired session - * it will do session recovery to initialize it. - */ - session = nfs4_alloc_session(clp); - if (!session) - return -ENOMEM; - - clp->cl_session = session; - /* - * The create session reply races with the server back - * channel probe. Mark the client NFS_CS_SESSION_INITING - * so that the client back channel can find the - * nfs_client struct - */ - nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING); - } -#endif /* CONFIG_NFS_V4_1 */ + int ret; + ret = clp->cl_mvops->init_client(clp); + if (ret) + return ret; return nfs4_init_callback(clp); } @@ -182,8 +352,7 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) */ struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour) + const char *ip_addr) { char buf[INET6_ADDRSTRLEN + 1]; struct nfs_client *old; @@ -198,8 +367,14 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* Check NFS protocol revision and initialize RPC op vector */ clp->rpc_ops = &nfs_v4_clientops; + if (clp->cl_minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - error = nfs_create_rpc_client(clp, timeparms, authflavour); + __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); + + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); + if (error == -EINVAL) + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); if (error < 0) goto error; @@ -236,14 +411,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; - if (clp != old) { - clp->cl_preserve_clid = true; - nfs_put_client(clp); - clp = old; - atomic_inc(&clp->cl_count); - } - return clp; + if (clp != old) + clp->cl_preserve_clid = true; + nfs_put_client(clp); + return old; error: nfs_mark_client_ready(clp, error); @@ -301,18 +473,32 @@ int nfs40_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; + struct nfs_client *pos, *prev = NULL; struct nfs4_setclientid_res clid = { .clientid = new->cl_clientid, .confirm = new->cl_confirm, }; - int status; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ - if (pos->cl_cons_state < NFS_CS_READY) + if (pos->cl_cons_state > NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + status = nfs_wait_client_init_complete(pos); + if (status < 0) + goto out; + status = -NFS4ERR_STALE_CLIENTID; + spin_lock(&nn->nfs_client_lock); + } + if (pos->cl_cons_state != NFS_CS_READY) continue; if (pos->rpc_ops != new->rpc_ops) @@ -332,40 +518,40 @@ int nfs40_walk_client_list(struct nfs_client *new, if (prev) nfs_put_client(prev); + prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); - if (status == 0) { + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + break; + case 0: nfs4_swap_callback_idents(pos, new); - nfs_put_client(pos); + prev = NULL; *result = pos; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - return 0; - } - if (status != -NFS4ERR_STALE_CLIENTID) { - nfs_put_client(pos); - dprintk("NFS: <-- %s status = %d, no result\n", - __func__, status); - return status; + goto out; + case -ERESTARTSYS: + case -ETIMEDOUT: + /* The callback path may have been inadvertently + * changed. Schedule recovery! + */ + nfs4_schedule_path_down_recovery(pos); + default: + goto out; } spin_lock(&nn->nfs_client_lock); - prev = pos; } + spin_unlock(&nn->nfs_client_lock); - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ + /* No match found. The server lost our clientid */ +out: if (prev) nfs_put_client(prev); - spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + return status; } #ifdef CONFIG_NFS_V4_1 @@ -431,16 +617,16 @@ int nfs41_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *n, *prev = NULL; - int error; + struct nfs_client *pos, *prev = NULL; + int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ - if (pos->cl_cons_state < NFS_CS_READY) { + if (pos->cl_cons_state > NFS_CS_READY) { atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -448,15 +634,18 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; - error = nfs_wait_client_init_complete(pos); - if (error < 0) { - nfs_put_client(pos); - spin_lock(&nn->nfs_client_lock); - continue; + status = nfs_wait_client_init_complete(pos); + if (status == 0) { + nfs4_schedule_lease_recovery(pos); + status = nfs4_wait_clnt_recover(pos); } - spin_lock(&nn->nfs_client_lock); + if (status < 0) + break; + status = -NFS4ERR_STALE_CLIENTID; } + if (pos->cl_cons_state != NFS_CS_READY) + continue; if (pos->rpc_ops != new->rpc_ops) continue; @@ -473,24 +662,20 @@ int nfs41_walk_client_list(struct nfs_client *new, if (!nfs4_match_serverowners(pos, new)) continue; - spin_unlock(&nn->nfs_client_lock); + atomic_inc(&pos->cl_count); + *result = pos; + status = 0; dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - - *result = pos; - return 0; + break; } - /* - * No matching nfs_client found. This should be impossible, - * because the new nfs_client has already been added to - * nfs_client_list by nfs_get_client(). - * - * Don't BUG(), since the caller is holding a mutex. - */ + /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); - pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); - return -NFS4ERR_STALE_CLIENTID; + dprintk("NFS: <-- %s status = %d\n", __func__, status); + if (prev) + nfs_put_client(prev); + return status; } #endif /* CONFIG_NFS_V4_1 */ @@ -555,14 +740,14 @@ static bool nfs4_cb_match_client(const struct sockaddr *addr, */ struct nfs_client * nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) + struct nfs4_sessionid *sid, u32 minorversion) { struct nfs_client *clp; struct nfs_net *nn = net_generic(net, nfs_net_id); spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 1) == false) + if (nfs4_cb_match_client(addr, clp, minorversion) == false) continue; if (!nfs4_has_session(clp)) @@ -585,7 +770,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs_client * nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, - struct nfs4_sessionid *sid) + struct nfs4_sessionid *sid, u32 minorversion) { return NULL; } @@ -619,6 +804,8 @@ static int nfs4_set_client(struct nfs_server *server, if (server->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (server->options & NFS_OPTION_MIGRATION) + set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour); @@ -709,7 +896,7 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) } static int nfs4_server_common_setup(struct nfs_server *server, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, bool auth_probe) { struct nfs_fattr *fattr; int error; @@ -723,19 +910,32 @@ static int nfs4_server_common_setup(struct nfs_server *server, return -ENOMEM; /* We must ensure the session is initialised first */ - error = nfs4_init_session(server); + error = nfs4_init_session(server->nfs_client); if (error < 0) goto out; + /* Set the basic capabilities */ + server->caps |= server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + + /* Probe the root fh to retrieve its FSID and filehandle */ - error = nfs4_get_rootfh(server, mntfh); + error = nfs4_get_rootfh(server, mntfh, auth_probe); if (error < 0) goto out; dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); + nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); nfs4_session_set_rwsize(server); @@ -758,7 +958,7 @@ out: * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + struct nfs_parsed_mount_data *data) { struct rpc_timeout timeparms; int error; @@ -770,10 +970,16 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; server->options = data->options; + server->auth_info = data->auth_info; + + /* Use the first specified auth flavor. If this flavor isn't + * allowed by the server, use the SECINFO path to try the + * other specified flavors */ + if (data->auth_info.flavor_len >= 1) + data->selected_flavor = data->auth_info.flavors[0]; + else + data->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ error = nfs4_set_client(server, @@ -781,7 +987,7 @@ static int nfs4_init_server(struct nfs_server *server, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - data->auth_flavors[0], + data->selected_flavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -789,13 +995,6 @@ static int nfs4_init_server(struct nfs_server *server, if (error < 0) goto error; - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; - if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); if (data->wsize) @@ -808,7 +1007,8 @@ static int nfs4_init_server(struct nfs_server *server, server->port = data->nfs_server.port; - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, + data->selected_flavor); error: /* Done */ @@ -826,6 +1026,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { struct nfs_server *server; + bool auth_probe; int error; dprintk("--> nfs4_create_server()\n"); @@ -834,12 +1035,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); + auth_probe = mount_info->parsed->auth_info.flavor_len < 1; + /* set up the general RPC client */ error = nfs4_init_server(server, mount_info->parsed); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mount_info->mntfh); + error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe); if (error < 0) goto error; @@ -860,6 +1063,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; + bool auth_probe; int error; dprintk("--> nfs4_create_referral_server()\n"); @@ -873,7 +1077,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; /* Get a client representation. * Note: NFSv4 always uses TCP, */ @@ -893,7 +1096,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh); + auth_probe = parent_server->auth_info.flavor_len < 1; + + error = nfs4_server_common_setup(server, mntfh, auth_probe); if (error < 0) goto error; @@ -905,3 +1110,112 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } + +/* + * Grab the destination's particulars, including lease expiry time. + * + * Returns zero if probe succeeded and retrieved FSID matches the FSID + * we have cached. + */ +static int nfs_probe_destination(struct nfs_server *server) +{ + struct inode *inode = server->super->s_root->d_inode; + struct nfs_fattr *fattr; + int error; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* Sanity: the probe won't work if the destination server + * does not recognize the migrated FH. */ + error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr); + + nfs_free_fattr(fattr); + return error; +} + +/** + * nfs4_update_server - Move an nfs_server to a different nfs_client + * + * @server: represents FSID to be moved + * @hostname: new end-point's hostname + * @sap: new end-point's socket address + * @salen: size of "sap" + * @net: net namespace + * + * The nfs_server must be quiescent before this function is invoked. + * Either its session is drained (NFSv4.1+), or its transport is + * plugged and drained (NFSv4.0). + * + * Returns zero on success, or a negative errno value. + */ +int nfs4_update_server(struct nfs_server *server, const char *hostname, + struct sockaddr *sap, size_t salen, struct net *net) +{ + struct nfs_client *clp = server->nfs_client; + struct rpc_clnt *clnt = server->client; + struct xprt_create xargs = { + .ident = clp->cl_proto, + .net = net, + .dstaddr = sap, + .addrlen = salen, + .servername = hostname, + }; + char buf[INET6_ADDRSTRLEN + 1]; + struct sockaddr_storage address; + struct sockaddr *localaddr = (struct sockaddr *)&address; + int error; + + dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + hostname); + + error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); + if (error != 0) { + dprintk("<-- %s(): rpc_switch_client_transport returned %d\n", + __func__, error); + goto out; + } + + error = rpc_localaddr(clnt, localaddr, sizeof(address)); + if (error != 0) { + dprintk("<-- %s(): rpc_localaddr returned %d\n", + __func__, error); + goto out; + } + + error = -EAFNOSUPPORT; + if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) { + dprintk("<-- %s(): rpc_ntop returned %d\n", + __func__, error); + goto out; + } + + nfs_server_remove_lists(server); + error = nfs4_set_client(server, hostname, sap, salen, buf, + clp->cl_rpcclient->cl_auth->au_flavor, + clp->cl_proto, clnt->cl_timeout, + clp->cl_minorversion, net); + nfs_put_client(clp); + if (error != 0) { + nfs_server_insert_lists(server); + dprintk("<-- %s(): nfs4_set_client returned %d\n", + __func__, error); + goto out; + } + + if (server->nfs_client->cl_hostname == NULL) + server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + nfs_server_insert_lists(server); + + error = nfs_probe_destination(server); + if (error < 0) + goto out; + + dprintk("<-- %s() succeeded\n", __func__); + +out: + return error; +} diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 08ddcccb888..a816f0627a6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) struct inode *dir; unsigned openflags = filp->f_flags; struct iattr attr; + int opened = 0; int err; /* @@ -30,9 +31,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: open file(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: open file(%pd2)\n", dentry); if ((openflags & O_ACCMODE) == 3) openflags--; @@ -55,7 +54,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_wb_all(inode); } - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened); if (IS_ERR(inode)) { err = PTR_ERR(inode); switch (err) { @@ -69,13 +68,12 @@ nfs4_file_open(struct inode *inode, struct file *filp) goto out_drop; } } - iput(inode); if (inode != dentry->d_inode) goto out_drop; nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); err = 0; out_put_ctx: @@ -94,7 +92,7 @@ static int nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); do { ret = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -102,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) break; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ + if (!ret) ret = pnfs_layoutcommit_inode(inode, true); mutex_unlock(&inode->i_mutex); /* @@ -120,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct file_operations nfs4_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = nfs_file_read, + .write_iter = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs4_file_open, .flush = nfs_file_flush, @@ -132,7 +129,7 @@ const struct file_operations nfs4_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, + .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index 549462e5b9b..c0b3a16b4a0 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -9,7 +9,7 @@ #define NFSDBG_FACILITY NFSDBG_CLIENT -int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) +int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { struct nfs_fsinfo fsinfo; int ret = -ENOMEM; @@ -21,7 +21,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh) goto out; /* Start by getting the root filehandle from the server */ - ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo); + ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe); if (ret < 0) { dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret); goto out; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 1e09eb78543..3d83cb1fdc7 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -11,9 +11,11 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/vfs.h> #include <linux/inet.h> #include "internal.h" @@ -119,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct nfs_server *server) + struct sockaddr *sa, size_t salen, struct net *net) { - struct net *net = rpc_net_ns(server->client); ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); @@ -133,73 +134,104 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +/** + * nfs_find_best_sec - Find a security mechanism supported locally + * @server: NFS server struct + * @flavors: List of security tuples returned by SECINFO procedure + * + * Return an rpc client that uses the first security mechanism in + * "flavors" that is locally supported. The "flavors" array + * is searched in the order returned from the server, per RFC 3530 + * recommendation and each flavor is checked for membership in the + * sec= mount option list if it exists. + * + * Return -EPERM if no matching flavor is found in the array. + * + * Please call rpc_shutdown_client() when you are done with this rpc client. + * + */ +static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt, + struct nfs_server *server, + struct nfs4_secinfo_flavors *flavors) { - struct gss_api_mech *mech; - struct xdr_netobj oid; - int i; - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; + rpc_authflavor_t pflavor; + struct nfs4_secinfo4 *secinfo; + unsigned int i; for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo_flavor *flavor; - flavor = &flavors->flavors[i]; - - if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { - pseudoflavor = flavor->flavor; - break; - } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->gss.sec_oid4.len; - oid.data = flavor->gss.sec_oid4.data; - mech = gss_mech_get_by_OID(&oid); - if (!mech) - continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); - gss_mech_put(mech); - break; + secinfo = &flavors->flavors[i]; + + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + pflavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + /* does the pseudoflavor match a sec= mount opt? */ + if (pflavor != RPC_AUTH_MAXFLAVOR && + nfs_auth_info_match(&server->auth_info, pflavor)) { + struct rpc_clnt *new; + struct rpc_cred *cred; + + /* Cloning creates an rpc_auth for the flavor */ + new = rpc_clone_client_set_auth(clnt, pflavor); + if (IS_ERR(new)) + continue; + /** + * Check that the user actually can use the + * flavor. This is mostly for RPC_AUTH_GSS + * where cr_init obtains a gss context + */ + cred = rpcauth_lookupcred(new->cl_auth, 0); + if (IS_ERR(cred)) { + rpc_shutdown_client(new); + continue; + } + put_rpccred(cred); + return new; + } } } - - return pseudoflavor; + return ERR_PTR(-EPERM); } -static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) +/** + * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup, + * return an rpc_clnt that uses the best available security flavor with + * respect to the secinfo flavor list and the sec= mount options. + * + * @clnt: RPC client to clone + * @inode: directory inode + * @name: lookup name + * + * Please call rpc_shutdown_client() when you are done with this rpc client. + */ +struct rpc_clnt * +nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode, + struct qstr *name) { struct page *page; struct nfs4_secinfo_flavors *flavors; - rpc_authflavor_t flavor; + struct rpc_clnt *new; int err; page = alloc_page(GFP_KERNEL); if (!page) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + flavors = page_address(page); err = nfs4_proc_secinfo(inode, name, flavors); if (err < 0) { - flavor = err; + new = ERR_PTR(err); goto out; } - flavor = nfs_find_best_sec(flavors); + new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors); out: put_page(page); - return flavor; -} - -/* - * Please call rpc_shutdown_client() when you are done with this client. - */ -struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, - struct qstr *name) -{ - rpc_authflavor_t flavor; - - flavor = nfs4_negotiate_security(inode, name); - if ((int)flavor < 0) - return ERR_PTR((int)flavor); - - return rpc_clone_client_set_auth(clnt, flavor); + return new; } static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, @@ -207,6 +239,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; unsigned int maxbuflen; @@ -232,8 +265,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, - NFS_SB(mountdata->sb)); + mountdata->addr, addr_bufsize, net); if (mountdata->addrlen == 0) continue; @@ -276,8 +308,7 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, if (locations == NULL || locations->nlocations <= 0) goto out; - dprintk("%s: referral at %s/%s\n", __func__, - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("%s: referral at %pd2\n", __func__, dentry); page = (char *) __get_free_page(GFP_USER); if (!page) @@ -341,8 +372,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * mnt = ERR_PTR(-ENOENT); parent = dget_parent(dentry); - dprintk("%s: getting locations for %s/%s\n", - __func__, parent->d_name.name, dentry->d_name.name); + dprintk("%s: getting locations for %pd2\n", + __func__, dentry); err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); @@ -363,21 +394,130 @@ out: struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr) { + rpc_authflavor_t flavor = server->client->cl_auth->au_flavor; struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct qstr *name = &dentry->d_name; struct rpc_clnt *client; struct vfsmount *mnt; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr); + client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); dput(parent); if (IS_ERR(client)) return ERR_CAST(client); - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { mnt = nfs_do_refmount(client, dentry); - else - mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor); + goto out; + } + if (client->cl_auth->au_flavor != flavor) + flavor = client->cl_auth->au_flavor; + mnt = nfs_do_submount(dentry, fh, fattr, flavor); +out: rpc_shutdown_client(client); return mnt; } + +/* + * Try one location from the fs_locations array. + * + * Returns zero on success, or a negative errno value. + */ +static int nfs4_try_replacing_one_location(struct nfs_server *server, + char *page, char *page2, + const struct nfs4_fs_location *location) +{ + const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(server->client); + struct sockaddr *sap; + unsigned int s; + size_t salen; + int error; + + sap = kmalloc(addr_bufsize, GFP_KERNEL); + if (sap == NULL) + return -ENOMEM; + + error = -ENOENT; + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; + char *hostname; + + if (buf->len <= 0 || buf->len > PAGE_SIZE) + continue; + + if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL) + continue; + + salen = nfs_parse_server_name(buf->data, buf->len, + sap, addr_bufsize, net); + if (salen == 0) + continue; + rpc_set_port(sap, NFS_PORT); + + error = -ENOMEM; + hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); + if (hostname == NULL) + break; + + error = nfs4_update_server(server, hostname, sap, salen, net); + kfree(hostname); + if (error == 0) + break; + } + + kfree(sap); + return error; +} + +/** + * nfs4_replace_transport - set up transport to destination server + * + * @server: export being migrated + * @locations: fs_locations array + * + * Returns zero on success, or a negative errno value. + * + * The client tries all the entries in the "locations" array, in the + * order returned by the server, until one works or the end of the + * array is reached. + */ +int nfs4_replace_transport(struct nfs_server *server, + const struct nfs4_fs_locations *locations) +{ + char *page = NULL, *page2 = NULL; + int loc, error; + + error = -ENOENT; + if (locations == NULL || locations->nlocations <= 0) + goto out; + + error = -ENOMEM; + page = (char *) __get_free_page(GFP_USER); + if (!page) + goto out; + page2 = (char *) __get_free_page(GFP_USER); + if (!page2) + goto out; + + for (loc = 0; loc < locations->nlocations; loc++) { + const struct nfs4_fs_location *location = + &locations->locations[loc]; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) + continue; + + error = nfs4_try_replacing_one_location(server, page, + page2, location); + if (error == 0) + break; + } + +out: + free_page((unsigned long)page); + free_page((unsigned long)page2); + return error; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf747ef8665..4bf3d97cc5a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -66,6 +66,8 @@ #include "nfs4session.h" #include "fscache.h" +#include "nfs4trace.h" + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_POLL_RETRY_MIN (HZ/10) @@ -77,15 +79,65 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); +static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state); + struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); -static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); +static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, + struct rpc_cred *); +static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, + struct rpc_cred *); +#endif + +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +static inline struct nfs4_label * +nfs4_label_init_security(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, struct nfs4_label *label) +{ + int err; + + if (label == NULL) + return NULL; + + if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) + return NULL; + + err = security_dentry_init_security(dentry, sattr->ia_mode, + &dentry->d_name, (void **)&label->label, &label->len); + if (err == 0) + return label; + + return NULL; +} +static inline void +nfs4_label_release_security(struct nfs4_label *label) +{ + if (label) + security_release_secctx(label->label, label->len); +} +static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) +{ + if (label) + return server->attr_bitmask; + + return server->attr_bitmask_nl; +} +#else +static inline struct nfs4_label * +nfs4_label_init_security(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, struct nfs4_label *l) +{ return NULL; } +static inline void +nfs4_label_release_security(struct nfs4_label *label) +{ return; } +static inline u32 * +nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) +{ return server->attr_bitmask; } #endif + /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { @@ -93,8 +145,11 @@ static int nfs4_map_errors(int err) return err; switch (err) { case -NFS4ERR_RESOURCE: + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: + case -NFS4ERR_WRONG_CRED: return -EPERM; case -NFS4ERR_BADOWNER: case -NFS4ERR_BADNAME: @@ -105,6 +160,8 @@ static int nfs4_map_errors(int err) return -EPROTONOSUPPORT; case -NFS4ERR_ACCESS: return -EACCES; + case -NFS4ERR_FILE_OPEN: + return -EBUSY; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -130,7 +187,10 @@ const u32 nfs4_fattr_bitmap[3] = { | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY + | FATTR4_WORD1_TIME_MODIFY, +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + FATTR4_WORD2_SECURITY_LABEL +#endif }; static const u32 nfs4_pnfs_open_bitmap[3] = { @@ -157,7 +217,7 @@ static const u32 nfs4_open_noattr_bitmap[3] = { | FATTR4_WORD0_FILEID, }; -const u32 nfs4_statfs_bitmap[2] = { +const u32 nfs4_statfs_bitmap[3] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL, @@ -166,7 +226,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -const u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[3] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -181,7 +241,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD2_LAYOUT_BLKSIZE }; -const u32 nfs4_fs_locations_bitmap[2] = { +const u32 nfs4_fs_locations_bitmap[3] = { FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE @@ -197,7 +257,7 @@ const u32 nfs4_fs_locations_bitmap[2] = { | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY - | FATTR4_WORD1_MOUNTED_ON_FILEID + | FATTR4_WORD1_MOUNTED_ON_FILEID, }; static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, @@ -264,7 +324,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable_unsafe(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -293,23 +353,42 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc } if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { + nfs_remove_bad_delegation(inode); + exception->retry = 1; + break; + } if (state == NULL) break; - nfs_remove_bad_delegation(state->inode); - nfs4_schedule_stateid_recovery(server, state); + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + ret = nfs4_schedule_stateid_recovery(server, state); + if (ret < 0) + break; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; + case -NFS4ERR_MOVED: + ret = nfs4_schedule_migration_recovery(server); + if (ret < 0) + break; + goto wait_on_recovery; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -357,11 +436,27 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc return nfs4_map_errors(ret); wait_on_recovery: ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; if (ret == 0) exception->retry = 1; return ret; } +/* + * Return 'true' if 'clp' is using an rpc_client that is integrity protected + * or 'false' otherwise. + */ +static bool _nfs4_is_integrity_protected(struct nfs_client *clp) +{ + rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor; + + if (flavor == RPC_AUTH_GSS_KRB5I || + flavor == RPC_AUTH_GSS_KRB5P) + return true; + + return false; +} static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) { @@ -376,21 +471,98 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp do_renew_lease(server->nfs_client, timestamp); } +struct nfs4_call_sync_data { + const struct nfs_server *seq_server; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; +}; + +static void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ + args->sa_slot = NULL; + args->sa_cache_this = cache_reply; + args->sa_privileged = 0; + + res->sr_slot = NULL; +} + +static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +{ + args->sa_privileged = 1; +} + +static int nfs40_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; + struct nfs4_slot *slot; + + /* slot already allocated? */ + if (res->sr_slot != NULL) + goto out_start; + + spin_lock(&tbl->slot_tbl_lock); + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); + + args->sa_slot = slot; + res->sr_slot = slot; + +out_start: + rpc_call_start(task); + return 0; + +out_sleep: + if (args->sa_privileged) + rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, + NULL, RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; +} + +static int nfs40_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + struct nfs4_slot *slot = res->sr_slot; + struct nfs4_slot_table *tbl; + + if (slot == NULL) + goto out; + + tbl = slot->table; + spin_lock(&tbl->slot_tbl_lock); + if (!nfs41_wake_and_assign_slot(tbl, slot)) + nfs4_free_slot(tbl, slot); + spin_unlock(&tbl->slot_tbl_lock); + + res->sr_slot = NULL; +out: + return 1; +} + #if defined(CONFIG_NFS_V4_1) static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot_table *tbl; + struct nfs4_slot *slot = res->sr_slot; bool send_new_highest_used_slotid = false; - if (!res->sr_slot) { - /* just wake up the next guy waiting since - * we may have not consumed a slot after all */ - dprintk("%s: No slot\n", __func__); - return; - } - tbl = res->sr_slot->table; + tbl = slot->table; session = tbl->session; spin_lock(&tbl->slot_tbl_lock); @@ -400,11 +572,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) if (tbl->highest_used_slotid > tbl->target_highest_slotid) send_new_highest_used_slotid = true; - if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) { + if (nfs41_wake_and_assign_slot(tbl, slot)) { send_new_highest_used_slotid = false; goto out_unlock; } - nfs4_free_slot(tbl, res->sr_slot); + nfs4_free_slot(tbl, slot); if (tbl->highest_used_slotid != NFS4_NO_SLOT) send_new_highest_used_slotid = false; @@ -415,19 +587,20 @@ out_unlock: nfs41_server_notify_highest_slotid_update(session->clp); } -static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; - struct nfs4_slot *slot; + struct nfs4_slot *slot = res->sr_slot; struct nfs_client *clp; bool interrupted = false; int ret = 1; + if (slot == NULL) + goto out_noaction; /* don't increment the sequence number if the task wasn't sent */ if (!RPC_WAS_SENT(task)) goto out; - slot = res->sr_slot; session = slot->table->session; if (slot->interrupted) { @@ -435,6 +608,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * interrupted = true; } + trace_nfs4_sequence_done(session, res); /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: @@ -501,6 +675,7 @@ out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); nfs41_sequence_free_slot(res); +out_noaction: return ret; retry_nowait: if (rpc_restart_call_prepare(task)) { @@ -514,31 +689,18 @@ out_retry: rpc_delay(task, NFS4_POLL_RETRY_MAX); return 0; } +EXPORT_SYMBOL_GPL(nfs41_sequence_done); static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { if (res->sr_slot == NULL) return 1; + if (!res->sr_slot->table->session) + return nfs40_sequence_done(task, res); return nfs41_sequence_done(task, res); } -static void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ - args->sa_slot = NULL; - args->sa_cache_this = 0; - args->sa_privileged = 0; - if (cache_reply) - args->sa_cache_this = 1; - res->sr_slot = NULL; -} - -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ - args->sa_privileged = 1; -} - int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -557,7 +719,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, task->tk_timeout = 0; spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && + if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) && !args->sa_privileged) { /* The state manager will wait until the slot table is empty */ dprintk("%s session is draining\n", __func__); @@ -576,7 +738,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, args->sa_slot = slot; - dprintk("<-- %s slotid=%d seqid=%d\n", __func__, + dprintk("<-- %s slotid=%u seqid=%u\n", __func__, slot->slot_nr, slot->seq_nr); res->sr_slot = slot; @@ -587,6 +749,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, * set to 1 if an rpc level failure occurs. */ res->sr_status = 1; + trace_nfs4_setup_sequence(session, args); out_success: rpc_call_start(task); return 0; @@ -602,38 +765,30 @@ out_sleep: } EXPORT_SYMBOL_GPL(nfs41_setup_sequence); -int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) +static int nfs4_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) { struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) { - rpc_call_start(task); - goto out; - } + if (!session) + return nfs40_setup_sequence(server, args, res, task); - dprintk("--> %s clp %p session %p sr_slot %d\n", + dprintk("--> %s clp %p session %p sr_slot %u\n", __func__, session->clp, session, res->sr_slot ? - res->sr_slot->slot_nr : -1); + res->sr_slot->slot_nr : NFS4_NO_SLOT); ret = nfs41_setup_sequence(session, args, res, task); -out: + dprintk("<-- %s status=%d\n", __func__, ret); return ret; } -struct nfs41_call_sync_data { - const struct nfs_server *seq_server; - struct nfs4_sequence_args *seq_args; - struct nfs4_sequence_res *seq_res; -}; - static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { - struct nfs41_call_sync_data *data = calldata; + struct nfs4_call_sync_data *data = calldata; struct nfs4_session *session = nfs4_get_session(data->seq_server); dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); @@ -643,7 +798,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) { - struct nfs41_call_sync_data *data = calldata; + struct nfs4_call_sync_data *data = calldata; nfs41_sequence_done(task, data->seq_res); } @@ -653,6 +808,42 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; +#else /* !CONFIG_NFS_V4_1 */ + +static int nfs4_setup_sequence(const struct nfs_server *server, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + return nfs40_setup_sequence(server, args, res, task); +} + +static int nfs4_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + return nfs40_sequence_done(task, res); +} + +#endif /* !CONFIG_NFS_V4_1 */ + +static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_call_sync_data *data = calldata; + nfs4_setup_sequence(data->seq_server, + data->seq_args, data->seq_res, task); +} + +static void nfs40_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_call_sync_data *data = calldata; + nfs4_sequence_done(task, data->seq_res); +} + +static const struct rpc_call_ops nfs40_call_sync_ops = { + .rpc_call_prepare = nfs40_call_sync_prepare, + .rpc_call_done = nfs40_call_sync_done, +}; + static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -661,7 +852,8 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, { int ret; struct rpc_task *task; - struct nfs41_call_sync_data data = { + struct nfs_client *clp = server->nfs_client; + struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = args, .seq_res = res, @@ -669,7 +861,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_task_setup task_setup = { .rpc_client = clnt, .rpc_message = msg, - .callback_ops = &nfs41_call_sync_ops, + .callback_ops = clp->cl_mvops->call_sync_ops, .callback_data = &data }; @@ -683,35 +875,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, return ret; } -#else -static -void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) -{ -} - -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) -{ -} - - -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) -{ - return 1; -} -#endif /* CONFIG_NFS_V4_1 */ - -static -int _nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) -{ - return rpc_call_sync(clnt, msg, 0); -} - static int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, @@ -720,9 +883,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - nfs41_init_sequence(args, res, cache_reply); - return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, - args, res); + nfs4_init_sequence(args, res, cache_reply); + return nfs4_call_sync_sequence(clnt, server, msg, args, res); } static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) @@ -747,6 +909,7 @@ struct nfs4_opendata { struct nfs4_string owner_name; struct nfs4_string group_name; struct nfs_fattr f_attr; + struct nfs4_label *f_label; struct dentry *dir; struct dentry *dentry; struct nfs4_state_owner *owner; @@ -754,14 +917,46 @@ struct nfs4_opendata { struct iattr attrs; unsigned long timestamp; unsigned int rpc_done : 1; + unsigned int file_created : 1; + unsigned int is_recover : 1; int rpc_status; int cancelled; }; +static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + if (err != -EINVAL) + return false; + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + return false; + server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1; + exception->retry = 1; + return true; +} + +static enum open_claim_type4 +nfs4_map_atomic_open_claim(struct nfs_server *server, + enum open_claim_type4 claim) +{ + if (server->caps & NFS_CAP_ATOMIC_OPEN_V1) + return claim; + switch (claim) { + default: + return claim; + case NFS4_OPEN_CLAIM_FH: + return NFS4_OPEN_CLAIM_NULL; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + return NFS4_OPEN_CLAIM_DELEGATE_CUR; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + return NFS4_OPEN_CLAIM_DELEGATE_PREV; + } +} static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; + p->o_res.f_label = p->f_label; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; @@ -773,6 +968,8 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, const struct iattr *attrs, + struct nfs4_label *label, + enum open_claim_type4 claim, gfp_t gfp_mask) { struct dentry *parent = dget_parent(dentry); @@ -783,15 +980,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) goto err; + + p->f_label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->f_label)) + goto err_free_p; + p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); if (p->o_arg.seqid == NULL) - goto err_free; + goto err_free_label; nfs_sb_active(dentry->d_sb); p->dentry = dget(dentry); p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); - p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS @@ -807,11 +1008,24 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.id.uniquifier = sp->so_seqid.owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; - p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; + p->o_arg.label = label; + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + switch (p->o_arg.claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + p->o_arg.fh = NFS_FH(dir); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + p->o_arg.fh = NFS_FH(dentry->d_inode); + } if (attrs != NULL && attrs->ia_valid != 0) { - __be32 verf[2]; + __u32 verf[2]; p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); @@ -827,7 +1041,10 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, nfs4_init_opendata_res(p); kref_init(&p->kref); return p; -err_free: + +err_free_label: + nfs4_label_free(p->f_label); +err_free_p: kfree(p); err: dput(parent); @@ -844,10 +1061,14 @@ static void nfs4_opendata_free(struct kref *kref) if (p->state != NULL) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); + + nfs4_label_free(p->f_label); + dput(p->dir); dput(p->dentry); nfs_sb_deactive(sb); nfs_fattr_free_names(&p->f_attr); + kfree(p->f_attr.mdsthreshold); kfree(p); } @@ -896,6 +1117,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) return 0; if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) return 0; + if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + return 0; nfs_mark_delegation_referenced(delegation); return 1; } @@ -915,11 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) nfs4_state_set_mode_locked(state, state->state | fmode); } -static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { + struct nfs_client *clp = state->owner->so_server->nfs_client; + bool need_recover = false; + + if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly) + need_recover = true; + if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly) + need_recover = true; + if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr) + need_recover = true; + if (need_recover) + nfs4_state_mark_reclaim_nograce(clp, state); +} + +static bool nfs_need_update_open_stateid(struct nfs4_state *state, + nfs4_stateid *stateid) +{ + if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) + return true; + if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs_test_and_clear_all_open_stateid(state); + return true; + } + if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) + return true; + return false; +} + +static void nfs_clear_open_stateid_locked(struct nfs4_state *state, + nfs4_stateid *stateid, fmode_t fmode) +{ + clear_bit(NFS_O_RDWR_STATE, &state->flags); + switch (fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_WRITE: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_READ: + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case 0: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); + } + if (stateid == NULL) + return; + if (!nfs_need_update_open_stateid(state, stateid)) + return; if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); +} + +static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +{ + write_seqlock(&state->seqlock); + nfs_clear_open_stateid_locked(state, stateid, fmode); + write_sequnlock(&state->seqlock); + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) + nfs4_schedule_state_manager(state->owner->so_server->nfs_client); +} + +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +{ switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); @@ -930,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * case FMODE_READ|FMODE_WRITE: set_bit(NFS_O_RDWR_STATE, &state->flags); } -} - -static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) -{ - write_seqlock(&state->seqlock); - nfs_set_open_stateid_locked(state, stateid, fmode); - write_sequnlock(&state->seqlock); + if (!nfs_need_update_open_stateid(state, stateid)) + return; + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + nfs4_stateid_copy(&state->stateid, stateid); + nfs4_stateid_copy(&state->open_stateid, stateid); } static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) @@ -972,7 +1253,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat goto no_delegation; spin_lock(&deleg_cur->lock); - if (nfsi->delegation != deleg_cur || + if (rcu_dereference(nfsi->delegation) != deleg_cur || + test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) || (deleg_cur->type & fmode) != fmode) goto no_delegation_unlock; @@ -993,6 +1275,8 @@ no_delegation: __update_open_stateid(state, open_stateid, NULL, fmode); ret = 1; } + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) + nfs4_schedule_state_manager(state->owner->so_server->nfs_client); return ret; } @@ -1017,7 +1301,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); + int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; @@ -1041,9 +1325,12 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) /* Save the delegation */ nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); - ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); - if (ret != 0) - goto out; + nfs_release_seqid(opendata->o_arg.seqid); + if (!opendata->is_recover) { + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); + if (ret != 0) + goto out; + } ret = -EAGAIN; /* Try to update the stateid using the delegation */ @@ -1096,29 +1383,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - ret = data->rpc_status; - goto err; + if (data->rpc_status) { + ret = data->rpc_status; + goto err; + } + /* cached opens have already been processed */ + goto update; } - ret = -ESTALE; - if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || - !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || - !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) - goto err; - - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err; - ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) goto err; if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); +update: update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); + atomic_inc(&state->count); return state; err: @@ -1141,7 +1423,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) ret = -EAGAIN; if (!(data->f_attr.valid & NFS_ATTR_FATTR)) goto err; - inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); + inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label); ret = PTR_ERR(inode); if (IS_ERR(inode)) goto err; @@ -1155,6 +1437,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) data->o_arg.fmode); iput(inode); out: + nfs_release_seqid(data->o_arg.seqid); return state; err_put_inode: iput(inode); @@ -1187,11 +1470,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state * return ERR_PTR(-ENOENT); } -static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state) +static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, + struct nfs4_state *state, enum open_claim_type4 claim) { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); + opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, + NULL, NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -1225,11 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * struct nfs4_state *newstate; int ret; + /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ + clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); /* memory barrier prior to reading state->n_* */ clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); if (state->n_rdwr != 0) { - clear_bit(NFS_O_RDWR_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); if (ret != 0) return ret; @@ -1237,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * return -ESTALE; } if (state->n_wronly != 0) { - clear_bit(NFS_O_WRONLY_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); if (ret != 0) return ret; @@ -1245,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * return -ESTALE; } if (state->n_rdonly != 0) { - clear_bit(NFS_O_RDONLY_STATE, &state->flags); ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); if (ret != 0) return ret; @@ -1277,11 +1564,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state fmode_t delegation_type = 0; int status; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_PREVIOUS); if (IS_ERR(opendata)) return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; - opendata->o_arg.fh = NFS_FH(state->inode); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) @@ -1300,6 +1586,9 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + trace_nfs4_open_reclaim(ctx, 0, err); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -1314,77 +1603,94 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) +{ + switch (err) { + default: + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); + case 0: + case -ENOENT: + case -ESTALE: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); + return -EAGAIN; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + set_bit(NFS_DELEGATED_STATE, &state->flags); + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_lease_recovery(server->nfs_client); + return -EAGAIN; + case -NFS4ERR_MOVED: + nfs4_schedule_migration_recovery(server); + return -EAGAIN; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(server->nfs_client); + return -EAGAIN; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs_inode_find_state_and_recover(state->inode, + stateid); + nfs4_schedule_stateid_recovery(server, state); + return 0; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + return -EAGAIN; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + return 0; + } + return err; +} + +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { + struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; - int ret; + int err; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); - opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - ret = nfs4_open_recover(opendata, state); + err = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); - return ret; + return nfs4_handle_delegation_recall_error(server, state, stateid, err); } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { - struct nfs4_exception exception = { }; - struct nfs_server *server = NFS_SERVER(state->inode); - int err; - do { - err = _nfs4_open_delegation_recall(ctx, state, stateid); - switch (err) { - case 0: - case -ENOENT: - case -ESTALE: - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - /* Don't recall a delegation if it was lost */ - nfs4_schedule_lease_recovery(server->nfs_client); - goto out; - case -ERESTARTSYS: - /* - * The show must go on: exit, but mark the - * stateid as needing recovery. - */ - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - nfs_inode_find_state_and_recover(state->inode, - stateid); - nfs4_schedule_stateid_recovery(server, state); - case -ENOMEM: - err = 0; - goto out; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); -out: - return err; + struct nfs4_opendata *data = calldata; + + nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, + &data->c_res.seq_res, task); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; + nfs40_sequence_done(task, &data->c_res.seq_res); + data->rpc_status = task->tk_status; if (data->rpc_status == 0) { nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); @@ -1413,6 +1719,7 @@ out_free: } static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_prepare = nfs4_open_confirm_prepare, .rpc_call_done = nfs4_open_confirm_done, .rpc_release = nfs4_open_confirm_release, }; @@ -1440,6 +1747,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) }; int status; + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; @@ -1461,9 +1769,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; + struct nfs_client *clp = sp->so_server->nfs_client; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) - return; + goto out_wait; /* * Check if we still need to send an OPEN call, or if we can use * a delegation instead. @@ -1476,15 +1785,20 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR && + data->o_arg.claim != NFS4_OPEN_CLAIM_DELEG_CUR_FH && can_open_delegated(delegation, data->o_arg.fmode)) goto unlock_no_action; rcu_read_unlock(); } /* Update client id. */ - data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; - if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { - task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; + data->o_arg.clientid = clp->cl_clientid; + switch (data->o_arg.claim) { + case NFS4_OPEN_CLAIM_PREVIOUS: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; + case NFS4_OPEN_CLAIM_FH: + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; @@ -1493,11 +1807,22 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) &data->o_res.seq_res, task) != 0) nfs_release_seqid(data->o_arg.seqid); + + /* Set the create mode (note dependency on the session type) */ + data->o_arg.createmode = NFS4_CREATE_UNCHECKED; + if (data->o_arg.open_flags & O_EXCL) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_GUARDED; + else if (clp->cl_mvops->minor_version > 0) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + } return; unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; +out_wait: nfs4_sequence_done(task, &data->o_res.seq_res); } @@ -1582,13 +1907,16 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; - nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; data->cancelled = 0; - if (isrecover) + data->is_recover = 0; + if (isrecover) { nfs4_set_sequence_privileged(&o_arg->seq_args); + data->is_recover = 1; + } task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1684,8 +2012,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) + if (o_arg->open_flags & O_CREAT) { update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_EXCL) + data->file_created = 1; + else if (o_res->cinfo.before != o_res->cinfo.after) + data->file_created = 1; + } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -1694,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return status; } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); return 0; } @@ -1713,7 +2046,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state); + opendata = nfs4_open_recoverdata_alloc(ctx, state, + NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); @@ -1731,6 +2065,9 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + trace_nfs4_open_expired(ctx, 0, err); + if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) + continue; switch (err) { default: goto out; @@ -1751,7 +2088,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return -EAGAIN; ret = nfs4_do_open_expired(ctx, state); put_nfs_open_context(ctx); return ret; @@ -1762,18 +2099,31 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid *stateid = &state->stateid; - int status; + struct nfs_delegation *delegation; + struct rpc_cred *cred = NULL; + int status = -NFS4ERR_BAD_STATEID; /* If a state reset has been done, test_stateid is unneeded */ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) return; - status = nfs41_test_stateid(server, stateid); + /* Get the delegation credential for use by test/free_stateid */ + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(state->inode)->delegation); + if (delegation != NULL && + nfs4_stateid_match(&delegation->stateid, stateid)) { + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_stateid(server, stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); + } else + rcu_read_unlock(); + if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid); + nfs41_free_stateid(server, stateid, cred); nfs_remove_bad_delegation(state->inode); write_seqlock(&state->seqlock); @@ -1781,6 +2131,9 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state) write_sequnlock(&state->seqlock); clear_bit(NFS_DELEGATED_STATE, &state->flags); } + + if (cred != NULL) + put_rpccred(cred); } /** @@ -1795,6 +2148,7 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid *stateid = &state->open_stateid; + struct rpc_cred *cred = state->owner->so_cred; int status; /* If a state reset has been done, test_stateid is unneeded */ @@ -1803,16 +2157,18 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) return -NFS4ERR_BAD_STATEID; - status = nfs41_test_stateid(server, stateid); + status = nfs41_test_stateid(server, stateid, cred); + trace_nfs4_test_open_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid); + nfs41_free_stateid(server, stateid, cred); clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); } return status; } @@ -1845,22 +2201,80 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct sattr->ia_valid |= ATTR_MTIME; } +static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, + fmode_t fmode, + int flags, + struct nfs_open_context *ctx) +{ + struct nfs4_state_owner *sp = opendata->owner; + struct nfs_server *server = sp->so_server; + struct dentry *dentry; + struct nfs4_state *state; + unsigned int seq; + int ret; + + seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); + + ret = _nfs4_proc_open(opendata); + if (ret != 0) + goto out; + + state = nfs4_opendata_to_nfs4_state(opendata); + ret = PTR_ERR(state); + if (IS_ERR(state)) + goto out; + if (server->caps & NFS_CAP_POSIX_LOCK) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + + dentry = opendata->dentry; + if (dentry->d_inode == NULL) { + /* FIXME: Is this d_drop() ever needed? */ + d_drop(dentry); + dentry = d_add_unique(dentry, igrab(state->inode)); + if (dentry == NULL) { + dentry = opendata->dentry; + } else if (dentry != ctx->dentry) { + dput(ctx->dentry); + ctx->dentry = dget(dentry); + } + nfs_set_verifier(dentry, + nfs_save_change_attribute(opendata->dir->d_inode)); + } + + ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); + if (ret != 0) + goto out; + + ctx->state = state; + if (dentry->d_inode == state->inode) { + nfs_inode_attach_open_context(ctx); + if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) + nfs4_schedule_stateid_recovery(server, state); + } +out: + return ret; +} + /* * Returns a referenced nfs4_state */ static int _nfs4_do_open(struct inode *dir, - struct dentry *dentry, - fmode_t fmode, + struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct rpc_cred *cred, - struct nfs4_state **res, - struct nfs4_threshold **ctx_th) + struct nfs4_label *label, + int *opened) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; + struct dentry *dentry = ctx->dentry; + struct rpc_cred *cred = ctx->cred; + struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; + fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); + enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; + struct nfs4_label *olabel = NULL; int status; /* Protect against reboot recovery conflicts */ @@ -1876,83 +2290,91 @@ static int _nfs4_do_open(struct inode *dir, if (dentry->d_inode != NULL) nfs4_return_incompatible_delegation(dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); + if (dentry->d_inode) + claim = NFS4_OPEN_CLAIM_FH; + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, + label, claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; - if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { - opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); - if (!opendata->f_attr.mdsthreshold) + if (label) { + olabel = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(olabel)) { + status = PTR_ERR(olabel); goto err_opendata_put; + } + } + + if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { + if (!opendata->f_attr.mdsthreshold) { + opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); + if (!opendata->f_attr.mdsthreshold) + goto err_free_label; + } opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); - status = _nfs4_proc_open(opendata); - if (status != 0) - goto err_opendata_put; - - state = nfs4_opendata_to_nfs4_state(opendata); - status = PTR_ERR(state); - if (IS_ERR(state)) - goto err_opendata_put; - if (server->caps & NFS_CAP_POSIX_LOCK) - set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); - - status = nfs4_opendata_access(cred, opendata, state, fmode, flags); + status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); if (status != 0) - goto err_opendata_put; + goto err_free_label; + state = ctx->state; - if (opendata->o_arg.open_flags & O_EXCL) { + if ((opendata->o_arg.open_flags & O_EXCL) && + (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { nfs4_exclusive_attrset(opendata, sattr); nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - state); - if (status == 0) + state, label, olabel); + if (status == 0) { nfs_setattr_update_inode(state->inode, sattr); - nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); + nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); + nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); + } } + if (opendata->file_created) + *opened |= FILE_CREATED; - if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) + if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { *ctx_th = opendata->f_attr.mdsthreshold; - else - kfree(opendata->f_attr.mdsthreshold); - opendata->f_attr.mdsthreshold = NULL; + opendata->f_attr.mdsthreshold = NULL; + } + + nfs4_label_free(olabel); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); - *res = state; return 0; +err_free_label: + nfs4_label_free(olabel); err_opendata_put: - kfree(opendata->f_attr.mdsthreshold); nfs4_opendata_put(opendata); err_put_state_owner: nfs4_put_state_owner(sp); out_err: - *res = NULL; return status; } static struct nfs4_state *nfs4_do_open(struct inode *dir, - struct dentry *dentry, - fmode_t fmode, + struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct rpc_cred *cred, - struct nfs4_threshold **ctx_th) + struct nfs4_label *label, + int *opened) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; int status; - fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; do { - status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, - &res, ctx_th); + status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); + res = ctx->state; + trace_nfs4_open_file(ctx, flags, status); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1988,7 +2410,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } - res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + if (nfs4_clear_cap_atomic_open_v1(server, status, &exception)) + continue; + res = ERR_PTR(nfs4_handle_exception(server, status, &exception)); } while (exception.retry); return res; @@ -1996,7 +2420,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state) + struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_setattrargs arg = { @@ -2004,9 +2429,11 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, .iap = sattr, .server = server, .bitmask = server->attr_bitmask, + .label = ilabel, }; struct nfs_setattrres res = { .fattr = fattr, + .label = olabel, .server = server, }; struct rpc_message msg = { @@ -2016,20 +2443,32 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, .rpc_cred = cred, }; unsigned long timestamp = jiffies; + fmode_t fmode; + bool truncate; int status; + arg.bitmask = nfs4_bitmask(server, ilabel); + if (ilabel) + arg.bitmask = nfs4_bitmask(server, olabel); + nfs_fattr_init(fattr); - if (state != NULL) { + /* Servers should only apply open mode checks for file size changes */ + truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; + fmode = truncate ? FMODE_WRITE : FMODE_READ; + + if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { + /* Use that stateid */ + } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); - } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, - FMODE_WRITE)) { - /* Use that stateid */ + if (!nfs4_valid_open_stateid(state)) + return -EBADF; + if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + &lockowner) == -EIO) + return -EBADF; } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); @@ -2041,7 +2480,8 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state) + struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { @@ -2050,9 +2490,17 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, }; int err; do { - err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); + err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); + trace_nfs4_setattr(inode, err); switch (err) { case -NFS4ERR_OPENMODE: + if (!(sattr->ia_valid & ATTR_SIZE)) { + pr_warn_once("NFSv4: server %s is incorrectly " + "applying open mode checks to " + "a SETATTR that is not " + "changing file size.\n", + server->nfs_client->cl_hostname); + } if (state && !(state->state & FMODE_WRITE)) { err = -EBADF; if (sattr->ia_valid & ATTR_OPEN) @@ -2088,22 +2536,10 @@ static void nfs4_free_closedata(void *data) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - nfs_sb_deactive_async(sb); + nfs_sb_deactive(sb); kfree(calldata); } -static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, - fmode_t fmode) -{ - spin_lock(&state->owner->so_lock); - if (!(fmode & FMODE_READ)) - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - if (!(fmode & FMODE_WRITE)) - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_O_RDWR_STATE, &state->flags); - spin_unlock(&state->owner->so_lock); -} - static void nfs4_close_done(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; @@ -2113,6 +2549,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; + trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ @@ -2121,11 +2558,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->roc) pnfs_roc_set_barrier(state->inode, calldata->roc_barrier); - nfs_set_open_stateid(state, &calldata->res.stateid, 0); + nfs_clear_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - nfs4_close_clear_stateid_flags(state, - calldata->arg.fmode); - break; + goto out_release; + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: @@ -2133,9 +2569,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) break; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { rpc_restart_call_prepare(task); + goto out_release; + } } + nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); +out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); @@ -2150,7 +2590,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) - return; + goto out_wait; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.fmode = FMODE_READ|FMODE_WRITE; @@ -2168,20 +2608,22 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode &= ~FMODE_WRITE; } } + if (!nfs4_valid_open_stateid(state)) + call_close = 0; spin_unlock(&state->owner->so_lock); if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ - task->tk_action = NULL; - nfs4_sequence_done(task, &calldata->res.seq_res); - goto out; + goto out_no_action; } if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && - pnfs_roc_drain(inode, &calldata->roc_barrier, task)) - goto out; + pnfs_roc_drain(inode, &calldata->roc_barrier, task)) { + nfs_release_seqid(calldata->arg.seqid); + goto out_wait; + } } nfs_fattr_init(calldata->res.fattr); @@ -2191,8 +2633,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); -out: dprintk("%s: done!\n", __func__); + return; +out_no_action: + task->tk_action = NULL; +out_wait: + nfs4_sequence_done(task, &calldata->res.seq_res); } static const struct rpc_call_ops nfs4_close_ops = { @@ -2231,10 +2677,13 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) }; int status = -ENOMEM; + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, + &task_setup_data.rpc_client, &msg); + calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -2271,17 +2720,22 @@ out: } static struct inode * -nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + int open_flags, struct iattr *attr, int *opened) { struct nfs4_state *state; + struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; + + label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, - ctx->cred, &ctx->mdsthreshold); + state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened); + + nfs4_label_release_security(label); + if (IS_ERR(state)) return ERR_CAST(state); - ctx->state = state; - return igrab(state->inode); + return state->inode; } static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) @@ -2294,6 +2748,10 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) nfs4_close_state(ctx->state, ctx->mode); } +#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) +#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL) + static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_server_caps_arg args = { @@ -2309,13 +2767,27 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + /* Sanity check the server answers */ + switch (server->nfs_client->cl_minorversion) { + case 0: + res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK; + res.attr_bitmask[2] = 0; + break; + case 1: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + break; + case 2: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME); - if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) + NFS_CAP_CTIME|NFS_CAP_MTIME| + NFS_CAP_SECURITY_LABEL); + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) server->caps |= NFS_CAP_HARDLINKS; @@ -2337,10 +2809,18 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_CTIME; if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) server->caps |= NFS_CAP_MTIME; +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) + server->caps |= NFS_CAP_SECURITY_LABEL; +#endif + memcpy(server->attr_bitmask_nl, res.attr_bitmask, + sizeof(server->attr_bitmask)); + server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + server->cache_consistency_bitmask[2] = 0; server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -2363,8 +2843,9 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + u32 bitmask[3]; struct nfs4_lookup_root_arg args = { - .bitmask = nfs4_fattr_bitmap, + .bitmask = bitmask, }; struct nfs4_lookup_res res = { .server = server, @@ -2377,6 +2858,13 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; + bitmask[0] = nfs4_fattr_bitmap[0]; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* + * Process the label in the upcoming getfattr + */ + bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; + nfs_fattr_init(info->fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } @@ -2388,6 +2876,7 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, int err; do { err = _nfs4_lookup_root(server, fhandle, info); + trace_nfs4_lookup_root(server, fhandle, info->fattr, err); switch (err) { case 0: case -NFS4ERR_WRONGSEC: @@ -2403,12 +2892,15 @@ out: static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info, rpc_authflavor_t flavor) { + struct rpc_auth_create_args auth_args = { + .pseudoflavor = flavor, + }; struct rpc_auth *auth; int ret; - auth = rpcauth_create(flavor, server->client); + auth = rpcauth_create(&auth_args, server->client); if (IS_ERR(auth)) { - ret = -EIO; + ret = -EACCES; goto out; } ret = nfs4_lookup_root(server, fhandle, info); @@ -2416,27 +2908,49 @@ out: return ret; } +/* + * Retry pseudoroot lookup with various security flavors. We do this when: + * + * NFSv4.0: the PUTROOTFH operation returns NFS4ERR_WRONGSEC + * NFSv4.1: the server does not support the SECINFO_NO_NAME operation + * + * Returns zero on success, or a negative NFS4ERR value, or a + * negative errno value. + */ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { - int i, len, status = 0; - rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS]; - - len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array)); - if (len < 0) - return len; - - for (i = 0; i < len; i++) { - /* AUTH_UNIX is the default flavor if none was specified, - * thus has already been tried. */ - if (flav_array[i] == RPC_AUTH_UNIX) - continue; - - status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); - if (status == -NFS4ERR_WRONGSEC || status == -EACCES) - continue; - break; + /* Per 3530bis 15.33.5 */ + static const rpc_authflavor_t flav_array[] = { + RPC_AUTH_GSS_KRB5P, + RPC_AUTH_GSS_KRB5I, + RPC_AUTH_GSS_KRB5, + RPC_AUTH_UNIX, /* courtesy */ + RPC_AUTH_NULL, + }; + int status = -EPERM; + size_t i; + + if (server->auth_info.flavor_len > 0) { + /* try each flavor specified by user */ + for (i = 0; i < server->auth_info.flavor_len; i++) { + status = nfs4_lookup_root_sec(server, fhandle, info, + server->auth_info.flavors[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } + } else { + /* no flavors specified by user, try default list */ + for (i = 0; i < ARRAY_SIZE(flav_array); i++) { + status = nfs4_lookup_root_sec(server, fhandle, info, + flav_array[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } } + /* * -EACCESS could mean that the user doesn't have correct permissions * to access the mount. It could also mean that we tried to mount @@ -2449,24 +2963,42 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -/* - * get the file handle for the "/" directory on the server +static int nfs4_do_find_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fsinfo *info) +{ + int mv = server->nfs_client->cl_minorversion; + return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info); +} + +/** + * nfs4_proc_get_rootfh - get file handle for server's pseudoroot + * @server: initialized nfs_server handle + * @fhandle: we fill in the pseudo-fs root file handle + * @info: we fill in an FSINFO struct + * @auth_probe: probe the auth flavours + * + * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fsinfo *info, + bool auth_probe) { - int minor_version = server->nfs_client->cl_minorversion; - int status = nfs4_lookup_root(server, fhandle, info); - if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) - /* - * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM - * by nfs4_map_errors() as this function exits. - */ - status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); + int status; + + switch (auth_probe) { + case false: + status = nfs4_lookup_root(server, fhandle, info); + if (status != -NFS4ERR_WRONGSEC) + break; + default: + status = nfs4_do_find_root_sec(server, fhandle, info); + } + if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) status = nfs4_do_fsinfo(server, fhandle, info); + return nfs4_map_errors(status); } @@ -2475,6 +3007,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, { int error; struct nfs_fattr *fattr = info->fattr; + struct nfs4_label *label = NULL; error = nfs4_server_capabilities(server, mntfh); if (error < 0) { @@ -2482,16 +3015,23 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, return error; } - error = nfs4_proc_getattr(server, mntfh, fattr); + label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(label)) + return PTR_ERR(label); + + error = nfs4_proc_getattr(server, mntfh, fattr, label); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); - return error; + goto err_free_label; } if (fattr->valid & NFS_ATTR_FATTR_FSID && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); +err_free_label: + nfs4_label_free(label); + return error; } @@ -2518,11 +3058,16 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; - /* Make sure server returned a different fsid for the referral */ + + /* + * If the fsid didn't change, this is a migration event, not a + * referral. Cause us to drop into the exception handler, which + * will kick off migration recovery. + */ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { dprintk("%s: server did not return a different fsid for" " a referral at %s\n", __func__, name->name); - status = -EIO; + status = -NFS4ERR_MOVED; goto out; } /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ @@ -2538,7 +3083,8 @@ out: return status; } -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_getattr_arg args = { .fh = fhandle, @@ -2546,6 +3092,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; struct nfs4_getattr_res res = { .fattr = fattr, + .label = label, .server = server, }; struct rpc_message msg = { @@ -2553,18 +3100,22 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - + + args.bitmask = nfs4_bitmask(server, label); + nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } -static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_getattr(server, fhandle, fattr), + err = _nfs4_proc_getattr(server, fhandle, fattr, label); + trace_nfs4_getattr(server, fhandle, fattr, err); + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; @@ -2594,19 +3145,20 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct inode *inode = dentry->d_inode; struct rpc_cred *cred = NULL; struct nfs4_state *state = NULL; + struct nfs4_label *label = NULL; int status; if (pnfs_ld_layoutret_on_setattr(inode)) - pnfs_return_layout(inode); + pnfs_commit_and_return_layout(inode); nfs_fattr_init(fattr); /* Deal with open(O_TRUNC) */ if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME); /* Optimization: if the end result is no change, don't RPC */ - if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) + if ((sattr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; /* Search for an existing open(O_WRITE) file */ @@ -2620,15 +3172,22 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } - status = nfs4_do_setattr(inode, cred, fattr, sattr, state); - if (status == 0) + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (IS_ERR(label)) + return PTR_ERR(label); + + status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); + if (status == 0) { nfs_setattr_update_inode(inode, sattr); + nfs_setsecurity(inode, fattr, label); + } + nfs4_label_free(label); return status; } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_server *server = NFS_SERVER(dir); int status; @@ -2640,6 +3199,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_res res = { .server = server, .fattr = fattr, + .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -2648,6 +3208,8 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, .rpc_resp = &res, }; + args.bitmask = nfs4_bitmask(server, label); + nfs_fattr_init(fattr); dprintk("NFS call lookup %s\n", name->name); @@ -2666,13 +3228,14 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_exception exception = { }; struct rpc_clnt *client = *clnt; int err; do { - err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr); + err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: err = -ENOENT; @@ -2684,8 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, err = -EPERM; if (client != *clnt) goto out; - - client = nfs4_create_sec_client(client, dir, name); + client = nfs4_negotiate_security(client, dir, name); if (IS_ERR(client)) return PTR_ERR(client); @@ -2706,12 +3268,13 @@ out: } static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + struct nfs4_label *label) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); + status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -2723,15 +3286,13 @@ struct rpc_clnt * nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_clnt *client = NFS_CLIENT(dir); int status; - struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); - if (status < 0) { - rpc_shutdown_client(client); + status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); + if (status < 0) return ERR_PTR(status); - } - return client; + return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; } static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) @@ -2751,7 +3312,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_cred = entry->cred, }; int mode = entry->mask; - int status; + int status = 0; /* * Determine which access bits we want to ask for... @@ -2788,8 +3349,9 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_access(inode, entry), + err = _nfs4_proc_access(inode, entry); + trace_nfs4_access(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -2842,8 +3404,9 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_readlink(inode, page, pgbase, pglen), + err = _nfs4_proc_readlink(inode, page, pgbase, pglen); + trace_nfs4_readlink(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -2856,27 +3419,26 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { + struct nfs4_label l, *ilabel = NULL; struct nfs_open_context *ctx; struct nfs4_state *state; + int opened = 0; int status = 0; ctx = alloc_nfs_open_context(dentry, FMODE_READ); if (IS_ERR(ctx)) return PTR_ERR(ctx); + ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); + sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, dentry, ctx->mode, - flags, sattr, ctx->cred, - &ctx->mdsthreshold); - d_drop(dentry); + state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; } - d_add(dentry, igrab(state->inode)); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - ctx->state = state; out: + nfs4_label_release_security(ilabel); put_nfs_open_context(ctx); return status; } @@ -2909,8 +3471,9 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name) struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_remove(dir, name), + err = _nfs4_proc_remove(dir, name); + trace_nfs4_remove(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; @@ -2924,7 +3487,9 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1); + + nfs_fattr_init(res->dir_attr); } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) @@ -2937,7 +3502,8 @@ static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlin static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { - struct nfs_removeres *res = task->tk_msg.rpc_resp; + struct nfs_unlinkdata *data = task->tk_calldata; + struct nfs_removeres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; @@ -2955,7 +3521,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; res->server = server; - nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1); } static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) @@ -2969,7 +3535,8 @@ static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renam static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { - struct nfs_renameres *res = task->tk_msg.rpc_resp; + struct nfs_renamedata *data = task->tk_calldata; + struct nfs_renameres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; @@ -2981,48 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 1; } -static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_server *server = NFS_SERVER(old_dir); - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .new_dir = NFS_FH(new_dir), - .old_name = old_name, - .new_name = new_name, - }; - struct nfs_renameres res = { - .server = server, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status = -ENOMEM; - - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); - if (!status) { - update_changeattr(old_dir, &res.old_cinfo); - update_changeattr(new_dir, &res.new_cinfo); - } - return status; -} - -static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs4_exception exception = { }; - int err; - do { - err = nfs4_handle_exception(NFS_SERVER(old_dir), - _nfs4_proc_rename(old_dir, old_name, - new_dir, new_name), - &exception); - } while (exception.retry); - return err; -} - static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { struct nfs_server *server = NFS_SERVER(inode); @@ -3034,6 +3559,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * }; struct nfs4_link_res res = { .server = server, + .label = NULL, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], @@ -3046,11 +3572,24 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * if (res.fattr == NULL) goto out; + res.label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(res.label)) { + status = PTR_ERR(res.label); + goto out; + } + arg.bitmask = nfs4_bitmask(server, res.label); + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(dir, &res.cinfo); - nfs_post_op_update_inode(inode, res.fattr); + status = nfs_post_op_update_inode(inode, res.fattr); + if (!status) + nfs_setsecurity(inode, res.fattr, res.label); } + + + nfs4_label_free(res.label); + out: nfs_free_fattr(res.fattr); return status; @@ -3074,6 +3613,7 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; + struct nfs4_label *label; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -3085,6 +3625,10 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, if (data != NULL) { struct nfs_server *server = NFS_SERVER(dir); + data->label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(data->label)) + goto out_free; + data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; data->msg.rpc_argp = &data->arg; data->msg.rpc_resp = &data->res; @@ -3093,13 +3637,17 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.name = name; data->arg.attrs = sattr; data->arg.ftype = ftype; - data->arg.bitmask = server->attr_bitmask; + data->arg.bitmask = nfs4_bitmask(server, data->label); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; + data->res.label = data->label; nfs_fattr_init(data->res.fattr); } return data; +out_free: + kfree(data); + return NULL; } static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) @@ -3108,18 +3656,20 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { update_changeattr(dir, &data->res.dir_cinfo); - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); } return status; } static void nfs4_free_createdata(struct nfs4_createdata *data) { + nfs4_label_free(data->label); kfree(data); } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr) + struct page *page, unsigned int len, struct iattr *sattr, + struct nfs4_label *label) { struct nfs4_createdata *data; int status = -ENAMETOOLONG; @@ -3135,6 +3685,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK]; data->arg.u.symlink.pages = &page; data->arg.u.symlink.len = len; + data->arg.label = label; status = nfs4_do_create(dir, dentry, data); @@ -3147,18 +3698,24 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, unsigned int len, struct iattr *sattr) { struct nfs4_exception exception = { }; + struct nfs4_label l, *label = NULL; int err; + + label = nfs4_label_init_security(dir, dentry, sattr, &l); + do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, dentry, page, - len, sattr), + err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label); + trace_nfs4_symlink(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); + + nfs4_label_release_security(label); return err; } static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, - struct iattr *sattr) + struct iattr *sattr, struct nfs4_label *label) { struct nfs4_createdata *data; int status = -ENOMEM; @@ -3167,6 +3724,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, if (data == NULL) goto out; + data->arg.label = label; status = nfs4_do_create(dir, dentry, data); nfs4_free_createdata(data); @@ -3178,14 +3736,20 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { struct nfs4_exception exception = { }; + struct nfs4_label l, *label = NULL; int err; + label = nfs4_label_init_security(dir, dentry, sattr, &l); + sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mkdir(dir, dentry, sattr), + err = _nfs4_proc_mkdir(dir, dentry, sattr, label); + trace_nfs4_mkdir(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); + nfs4_label_release_security(label); + return err; } @@ -3210,9 +3774,8 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; - dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name, + dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, + dentry, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; @@ -3234,16 +3797,17 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), - _nfs4_proc_readdir(dentry, cred, cookie, - pages, count, plus), + err = _nfs4_proc_readdir(dentry, cred, cookie, + pages, count, plus); + trace_nfs4_readdir(dentry->d_inode, err); + err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err, &exception); } while (exception.retry); return err; } static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, - struct iattr *sattr, dev_t rdev) + struct iattr *sattr, struct nfs4_label *label, dev_t rdev) { struct nfs4_createdata *data; int mode = sattr->ia_mode; @@ -3268,7 +3832,8 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, status = -EINVAL; goto out_free; } - + + data->arg.label = label; status = nfs4_do_create(dir, dentry, data); out_free: nfs4_free_createdata(data); @@ -3280,14 +3845,21 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { struct nfs4_exception exception = { }; + struct nfs4_label l, *label = NULL; int err; + label = nfs4_label_init_security(dir, dentry, sattr, &l); + sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mknod(dir, dentry, sattr, rdev), + err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev); + trace_nfs4_mknod(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); + + nfs4_label_release_security(label); + return err; } @@ -3345,12 +3917,22 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { struct nfs4_exception exception = { }; + unsigned long now = jiffies; int err; do { - err = nfs4_handle_exception(server, - _nfs4_do_fsinfo(server, fhandle, fsinfo), - &exception); + err = _nfs4_do_fsinfo(server, fhandle, fsinfo); + trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); + if (err == 0) { + struct nfs_client *clp = server->nfs_client; + + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo->lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + break; + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -3410,15 +3992,57 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -void __nfs4_read_done_cb(struct nfs_read_data *data) +int nfs4_set_rw_stateid(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + const struct nfs_lockowner *lockowner = NULL; + + if (l_ctx != NULL) + lockowner = &l_ctx->lockowner; + return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); +} +EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); + +static bool nfs4_stateid_is_current(nfs4_stateid *stateid, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode) +{ + nfs4_stateid current_stateid; + + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; + return nfs4_stateid_match(stateid, ¤t_stateid); +} + +static bool nfs4_error_stateid_expired(int err) +{ + switch (err) { + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_OPENMODE: + case -NFS4ERR_EXPIRED: + return true; + } + return false; +} + +void __nfs4_read_done_cb(struct nfs_pgio_data *data) { nfs_invalidate_atime(data->header->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_server *server = NFS_SERVER(data->header->inode); + trace_nfs4_read(data, task->tk_status); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -3430,38 +4054,61 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static bool nfs4_read_stateid_changed(struct rpc_task *task, + struct nfs_pgio_args *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_READ)) + return false; + rpc_restart_call_prepare(task); + return true; +} + +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { dprintk("--> %s\n", __func__); if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - - return data->read_done_cb ? data->read_done_cb(task, data) : + if (nfs4_read_stateid_changed(task, &data->args)) + return -EAGAIN; + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_read_done_cb(task, data); } -static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { data->timestamp = jiffies; - data->read_done_cb = nfs4_read_done_cb; + data->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->header->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, - task); + task)) + return 0; + if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, + data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) + return -EIO; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + return -EIO; + return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; + trace_nfs4_write(data, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -3473,16 +4120,32 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data return 0; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +static bool nfs4_write_stateid_changed(struct rpc_task *task, + struct nfs_pgio_args *args) +{ + + if (!nfs4_error_stateid_expired(task->tk_status) || + nfs4_stateid_is_current(&args->stateid, + args->context, + args->lock_context, + FMODE_WRITE)) + return false; + rpc_restart_call_prepare(task); + return true; +} + +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->write_done_cb ? data->write_done_cb(task, data) : + if (nfs4_write_stateid_changed(task, &data->args)) + return -EAGAIN; + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_write_done_cb(task, data); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) { const struct nfs_pgio_header *hdr = data->header; @@ -3495,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->header->inode); @@ -3505,21 +4168,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag } else data->args.bitmask = server->cache_consistency_bitmask; - if (!data->write_done_cb) - data->write_done_cb = nfs4_write_done_cb; + if (!data->pgio_done_cb) + data->pgio_done_cb = nfs4_write_done_cb; data->res.server = server; data->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); -} - -static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -3534,6 +4189,7 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da { struct inode *inode = data->inode; + trace_nfs4_commit(data, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -3556,7 +4212,7 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } struct nfs4_renewdata { @@ -3585,7 +4241,14 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) struct nfs_client *clp = data->client; unsigned long timestamp = data->timestamp; - if (task->tk_status < 0) { + trace_nfs4_renew_async(clp, task->tk_status); + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + break; + default: /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) return; @@ -3621,7 +4284,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, return -ENOMEM; data->client = clp; data->timestamp = jiffies; - return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, &nfs4_renew_ops, data); } @@ -3635,7 +4298,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) unsigned long now = jiffies; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status < 0) return status; do_renew_lease(clp, now); @@ -3644,9 +4307,7 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) static inline int nfs4_server_supports_acls(struct nfs_server *server) { - return (server->caps & NFS_CAP_ACLS) - && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) - && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); + return server->caps & NFS_CAP_ACLS; } /* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that @@ -3842,6 +4503,7 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl ssize_t ret; do { ret = __nfs4_get_acl_uncached(inode, buf, buflen); + trace_nfs4_get_acl(inode, ret); if (ret >= 0) break; ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); @@ -3921,14 +4583,166 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - __nfs4_proc_set_acl(inode, buf, buflen), + err = __nfs4_proc_set_acl(inode, buf, buflen); + trace_nfs4_set_acl(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + return err; +} + +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +static int _nfs4_get_security_label(struct inode *inode, void *buf, + size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct nfs4_label label = {0, 0, buflen, buf}; + + u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; + struct nfs4_getattr_arg arg = { + .fh = NFS_FH(inode), + .bitmask = bitmask, + }; + struct nfs4_getattr_res res = { + .fattr = &fattr, + .label = &label, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int ret; + + nfs_fattr_init(&fattr); + + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); + if (ret) + return ret; + if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) + return -ENOENT; + if (buflen < label.len) + return -ERANGE; + return 0; +} + +static int nfs4_get_security_label(struct inode *inode, void *buf, + size_t buflen) +{ + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) + return -EOPNOTSUPP; + + do { + err = _nfs4_get_security_label(inode, buf, buflen); + trace_nfs4_get_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_do_set_security_label(struct inode *inode, + struct nfs4_label *ilabel, + struct nfs_fattr *fattr, + struct nfs4_label *olabel) +{ + + struct iattr sattr = {0}; + struct nfs_server *server = NFS_SERVER(inode); + const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; + struct nfs_setattrargs arg = { + .fh = NFS_FH(inode), + .iap = &sattr, + .server = server, + .bitmask = bitmask, + .label = ilabel, + }; + struct nfs_setattrres res = { + .fattr = fattr, + .label = olabel, + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + int status; + + nfs4_stateid_copy(&arg.stateid, &zero_stateid); + + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); + if (status) + dprintk("%s failed: %d\n", __func__, status); + + return status; +} + +static int nfs4_do_set_security_label(struct inode *inode, + struct nfs4_label *ilabel, + struct nfs_fattr *fattr, + struct nfs4_label *olabel) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_set_security_label(inode, ilabel, + fattr, olabel); + trace_nfs4_set_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; } static int +nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen) +{ + struct nfs4_label ilabel, *olabel = NULL; + struct nfs_fattr fattr; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + int status; + + if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) + return -EOPNOTSUPP; + + nfs_fattr_init(&fattr); + + ilabel.pi = 0; + ilabel.lfs = 0; + ilabel.label = (char *)buf; + ilabel.len = buflen; + + cred = rpc_lookup_cred(); + if (IS_ERR(cred)) + return PTR_ERR(cred); + + olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (IS_ERR(olabel)) { + status = -PTR_ERR(olabel); + goto out; + } + + status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); + if (status == 0) + nfs_setsecurity(inode, &fattr, olabel); + + nfs4_label_free(olabel); +out: + put_rpccred(cred); + return status; +} +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ + + +static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; @@ -3945,15 +4759,25 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_OPENMODE: if (state == NULL) break; - nfs4_schedule_stateid_recovery(server, state); + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto recovery_failed; goto wait_on_recovery; case -NFS4ERR_EXPIRED: - if (state != NULL) - nfs4_schedule_stateid_recovery(server, state); + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(server, state) < 0) + goto recovery_failed; + } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; + case -NFS4ERR_MOVED: + if (nfs4_schedule_migration_recovery(server) < 0) + goto recovery_failed; + goto wait_on_recovery; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -3965,26 +4789,28 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); - task->tk_status = 0; - return -EAGAIN; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: - task->tk_status = 0; - return -EAGAIN; + goto restart_call; } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +recovery_failed: + task->tk_status = -EIO; + return 0; wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + goto recovery_failed; +restart_call: task->tk_status = 0; return -EAGAIN; } @@ -3998,11 +4824,11 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, /* An impossible timestamp guarantees this value * will never match a generated boot time. */ verf[0] = 0; - verf[1] = (__be32)(NSEC_PER_SEC + 1); + verf[1] = cpu_to_be32(NSEC_PER_SEC + 1); } else { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - verf[0] = (__be32)nn->boot_time.tv_sec; - verf[1] = (__be32)nn->boot_time.tv_nsec; + verf[0] = cpu_to_be32(nn->boot_time.tv_sec); + verf[1] = cpu_to_be32(nn->boot_time.tv_nsec); } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } @@ -4028,15 +4854,33 @@ static unsigned int nfs4_init_uniform_client_string(const struct nfs_client *clp, char *buf, size_t len) { - char *nodename = clp->cl_rpcclient->cl_nodename; + const char *nodename = clp->cl_rpcclient->cl_nodename; if (nfs4_client_id_uniquifier[0] != '\0') - nodename = nfs4_client_id_uniquifier; + return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, + clp->cl_minorversion, + nfs4_client_id_uniquifier, + nodename); return scnprintf(buf, len, "Linux NFSv%u.%u %s", clp->rpc_ops->version, clp->cl_minorversion, nodename); } +/* + * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback + * services. Advertise one based on the address family of the + * clientaddr. + */ +static unsigned int +nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) +{ + if (strchr(clp->cl_ipaddr, ':') != NULL) + return scnprintf(buf, len, "tcp6"); + else + return scnprintf(buf, len, "tcp"); +} + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure @@ -4078,12 +4922,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, setclientid.sc_name, sizeof(setclientid.sc_name)); /* cb_client4 */ - rcu_read_lock(); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, - sizeof(setclientid.sc_netid), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_NETID)); - rcu_read_unlock(); + setclientid.sc_netid_len = + nfs4_init_callback_netid(clp, + setclientid.sc_netid, + sizeof(setclientid.sc_netid)); setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); @@ -4092,6 +4934,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_rpcclient->cl_auth->au_ops->au_name, setclientid.sc_name_len, setclientid.sc_name); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; } @@ -4108,27 +4951,18 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) { - struct nfs_fsinfo fsinfo; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], .rpc_argp = arg, - .rpc_resp = &fsinfo, .rpc_cred = cred, }; - unsigned long now; int status; dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - now = jiffies; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status == 0) { - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = now; - spin_unlock(&clp->cl_lock); - } + trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -4150,12 +4984,19 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) if (!nfs4_sequence_done(task, &data->res.seq_res)) return; + trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { @@ -4171,7 +5012,6 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } -#if defined(CONFIG_NFS_V4_1) static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) { struct nfs4_delegreturndata *d_data; @@ -4183,12 +5023,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) &d_data->res.seq_res, task); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs4_delegreturn_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs4_delegreturn_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; @@ -4213,7 +5050,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->cache_consistency_bitmask; @@ -4253,6 +5090,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 int err; do { err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + trace_nfs4_delegreturn(inode, err); switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -4273,7 +5111,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable_unsafe(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; @@ -4317,6 +5155,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } request->fl_ops->fl_release_private(request); + request->fl_ops = NULL; out: return status; } @@ -4327,8 +5166,9 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock * int err; do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_getlk(state, cmd, request), + err = _nfs4_proc_getlk(state, cmd, request); + trace_nfs4_get_lock(request, state, cmd, err); + err = nfs4_handle_exception(NFS_SERVER(state->inode), err, &exception); } while (exception.retry); return err; @@ -4423,12 +5263,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) struct nfs4_unlockdata *calldata = data; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) - return; + goto out_wait; if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ - task->tk_action = NULL; - nfs4_sequence_done(task, &calldata->res.seq_res); - return; + goto out_no_action; } calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, @@ -4436,6 +5274,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); + return; +out_no_action: + task->tk_action = NULL; +out_wait: + nfs4_sequence_done(task, &calldata->res.seq_res); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4462,6 +5305,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .flags = RPC_TASK_ASYNC, }; + nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, + NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); + /* Ensure this is an unlock - when canceling a lock, the * canceled lock is passed in, and it won't be an unlock. */ @@ -4473,7 +5319,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4482,7 +5328,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs_inode *nfsi = NFS_I(state->inode); + struct inode *inode = state->inode; + struct nfs4_state_owner *sp = state->owner; + struct nfs_inode *nfsi = NFS_I(inode); struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; struct rpc_task *task; @@ -4492,18 +5340,23 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ request->fl_flags |= FL_EXISTS; + /* Exclude nfs_delegation_claim_locks() */ + mutex_lock(&sp->so_delegreturn_mutex); + /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); if (do_vfs_lock(request->fl_file, request) == -ENOENT) { up_read(&nfsi->rwsem); + mutex_unlock(&sp->so_delegreturn_mutex); goto out; } up_read(&nfsi->rwsem); + mutex_unlock(&sp->so_delegreturn_mutex); if (status != 0) goto out; /* Is this a delegated lock? */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - goto out; lsp = request->fl_u.nfs4_fl.owner; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) + goto out; seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; if (seqid == NULL) @@ -4516,6 +5369,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * rpc_put_task(task); out: request->fl_flags = fl_flags; + trace_nfs4_unlock(request, state, F_SETLK, status); return status; } @@ -4576,26 +5430,34 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) - return; + goto out_wait; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } - data->arg.open_stateid = &state->stateid; + data->arg.open_stateid = &state->open_stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; } else data->arg.new_lock_owner = 0; + if (!nfs4_valid_open_stateid(state)) { + data->rpc_status = -EBADF; + task->tk_action = NULL; + goto out_release_open_seqid; + } data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, &data->res.seq_res, task) == 0) return; +out_release_open_seqid: nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); +out_wait: + nfs4_sequence_done(task, &data->res.seq_res); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } @@ -4693,7 +5555,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4731,6 +5593,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); + trace_nfs4_lock_reclaim(request, state, F_SETLK, err); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -4749,10 +5612,15 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request err = nfs4_set_lock_state(state, request); if (err != 0) return err; + if (!recover_lost_locks) { + set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags); + return 0; + } do { if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED); + trace_nfs4_lock_expired(request, state, F_SETLK, err); switch (err) { default: goto out; @@ -4783,13 +5651,19 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) list_for_each_entry(lsp, &state->lock_states, ls_locks) { if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - status = nfs41_test_stateid(server, &lsp->ls_stateid); + struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + + status = nfs41_test_stateid(server, + &lsp->ls_stateid, + cred); + trace_nfs4_test_lock_stateid(state, lsp, status); if (status != NFS_OK) { /* Free the stateid unless the server * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) nfs41_free_stateid(server, - &lsp->ls_stateid); + &lsp->ls_stateid, + cred); clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); ret = status; } @@ -4813,8 +5687,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { + struct nfs4_state_owner *sp = state->owner; struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; + unsigned int seq; int status = -ENOLCK; if ((fl_flags & FL_POSIX) && @@ -4836,9 +5712,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock status = do_vfs_lock(request->fl_file, request); goto out_unlock; } + seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); + up_read(&nfsi->rwsem); status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); if (status != 0) + goto out; + down_read(&nfsi->rwsem); + if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { + status = -NFS4ERR_DELAY; goto out_unlock; + } /* Note: we always want to sleep here! */ request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) @@ -4861,6 +5744,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * do { err = _nfs4_proc_setlk(state, cmd, request); + trace_nfs4_set_lock(request, state, cmd, err); if (err == -NFS4ERR_DENIED) err = -EAGAIN; err = nfs4_handle_exception(NFS_SERVER(state->inode), @@ -4927,69 +5811,54 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return status; } -int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) +int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; int err; err = nfs4_set_lock_state(state, fl); if (err != 0) - goto out; - do { - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - switch (err) { - default: - printk(KERN_ERR "NFS: %s: unhandled error " - "%d.\n", __func__, err); - case 0: - case -ESTALE: - goto out; - case -NFS4ERR_EXPIRED: - nfs4_schedule_stateid_recovery(server, state); - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - nfs4_schedule_lease_recovery(server->nfs_client); - goto out; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_DEADSESSION: - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); - goto out; - case -ERESTARTSYS: - /* - * The show must go on: exit, but mark the - * stateid as needing recovery. - */ - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - nfs4_schedule_stateid_recovery(server, state); - err = 0; - goto out; - case -ENOMEM: - case -NFS4ERR_DENIED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - err = 0; - goto out; - case -NFS4ERR_DELAY: - break; - } - err = nfs4_handle_exception(server, err, &exception); - } while (exception.retry); -out: - return err; + return err; + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + return nfs4_handle_delegation_recall_error(server, state, stateid, err); } struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; + struct nfs_release_lockowner_res res; + unsigned long timestamp; }; +static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + nfs40_setup_sequence(data->server, + &data->args.seq_args, &data->res.seq_res, task); + data->timestamp = jiffies; +} + +static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + struct nfs_server *server = data->server; + + nfs40_sequence_done(task, &data->res.seq_res); + + switch (task->tk_status) { + case 0: + renew_lease(server, data->timestamp); + break; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_LEASE_MOVED: + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) + rpc_restart_call_prepare(task); + } +} + static void nfs4_release_lockowner_release(void *calldata) { struct nfs_release_lockowner_data *data = calldata; @@ -4998,12 +5867,13 @@ static void nfs4_release_lockowner_release(void *calldata) } static const struct rpc_call_ops nfs4_release_lockowner_ops = { + .rpc_call_prepare = nfs4_release_lockowner_prepare, + .rpc_call_done = nfs4_release_lockowner_done, .rpc_release = nfs4_release_lockowner_release, }; -int nfs4_release_lockowner(struct nfs4_lock_state *lsp) +static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct nfs_server *server = lsp->ls_state->owner->so_server; struct nfs_release_lockowner_data *data; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], @@ -5011,6 +5881,7 @@ int nfs4_release_lockowner(struct nfs4_lock_state *lsp) if (server->nfs_client->cl_mvops->minor_version != 0) return -EINVAL; + data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; @@ -5019,7 +5890,10 @@ int nfs4_release_lockowner(struct nfs4_lock_state *lsp) data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->args.lock_owner.id = lsp->ls_seqid.owner_id; data->args.lock_owner.s_dev = server->s_dev; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; } @@ -5059,6 +5933,53 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, return len; } +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +static inline int nfs4_server_supports_labels(struct nfs_server *server) +{ + return server->caps & NFS_CAP_SECURITY_LABEL; +} + +static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, + const void *buf, size_t buflen, + int flags, int type) +{ + if (security_ismaclabel(key)) + return nfs4_set_security_label(dentry, buf, buflen); + + return -EOPNOTSUPP; +} + +static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, + void *buf, size_t buflen, int type) +{ + if (security_ismaclabel(key)) + return nfs4_get_security_label(dentry->d_inode, buf, buflen); + return -EOPNOTSUPP; +} + +static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, + size_t list_len, const char *name, + size_t name_len, int type) +{ + size_t len = 0; + + if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) { + len = security_inode_listsecurity(dentry->d_inode, NULL, 0); + if (list && len <= list_len) + security_inode_listsecurity(dentry->d_inode, list, len); + } + return len; +} + +static const struct xattr_handler nfs4_xattr_nfs4_label_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = nfs4_xattr_list_nfs4_label, + .get = nfs4_xattr_get_nfs4_label, + .set = nfs4_xattr_set_nfs4_label, +}; +#endif + + /* * nfs_fhget will use either the mounted_on_fileid or the fileid */ @@ -5082,7 +6003,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[2] = { + u32 bitmask[3] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, }; struct nfs4_fs_locations_arg args = { @@ -5126,14 +6047,300 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_fs_locations(client, dir, name, fs_locations, page), + err = _nfs4_proc_fs_locations(client, dir, name, + fs_locations, page); + trace_nfs4_get_fs_locations(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; } -static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop returning + * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is + * appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .clientid = server->nfs_client->cl_clientid, + .fh = NFS_FH(inode), + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + .renew = 1, /* append RENEW */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + nfs_fattr_init(&locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + if (status) + return status; + + renew_lease(server, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID + * performing this operation is identified in the SEQUENCE + * operation in this compound. + * + * When the client supports GETATTR(fs_locations_info), it can + * be plumbed in here. + */ +static int _nfs41_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .fh = NFS_FH(inode), + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + nfs_fattr_init(&locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_get_locations - discover locations for a migrated FSID + * @inode: inode on FSID that is migrating + * @locations: result of query + * @page: buffer + * @cred: credential to use for this operation + * + * Returns NFS4_OK on success, a negative NFS4ERR status code if the + * operation failed, or a negative errno if a local error occurred. + * + * On success, "locations" is filled in, but if the server has + * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not + * asserted. + * + * -NFS4ERR_LEASE_MOVED is returned if the server still has leases + * from this client that require migration recovery. + */ +int nfs4_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { }; + int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(NFS_FH(inode), __func__); + + do { + status = ops->get_locations(inode, locations, page, cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop + * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation + * is appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + .clientid = clp->cl_clientid, + .renew = 1, /* append RENEW */ + }; + struct nfs4_fsid_present_res res = { + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status) + return status; + + do_renew_lease(clp, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing + * this operation is identified in the SEQUENCE operation in this + * compound. + */ +static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + }; + struct nfs4_fsid_present_res res = { + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_fsid_present - Is this FSID present or absent on server? + * @inode: inode on FSID to check + * @cred: credential to use for this operation + * + * Server indicates whether the FSID is present, moved, or not + * recognized. This operation is necessary to clear a LEASE_MOVED + * condition for this client ID. + * + * Returns NFS4_OK if the FSID is present on this server, + * -NFS4ERR_MOVED if the FSID is no longer present, a negative + * NFS4ERR code if some error occurred on the server, or a + * negative errno if a local failure occurred. + */ +int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { }; + int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(NFS_FH(inode), __func__); + + do { + status = ops->fsid_present(inode, cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + +/** + * If 'use_integrity' is true and the state managment nfs_client + * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient + * and the machine credential as per RFC3530bis and RFC5661 Security + * Considerations sections. Otherwise, just use the user cred with the + * filesystem's rpc_client. + */ +static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity) { int status; struct nfs4_secinfo_arg args = { @@ -5148,10 +6355,27 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_argp = &args, .rpc_resp = &res, }; + struct rpc_clnt *clnt = NFS_SERVER(dir)->client; + struct rpc_cred *cred = NULL; + + if (use_integrity) { + clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient; + cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client); + msg.rpc_cred = cred; + } dprintk("NFS call secinfo %s\n", name->name); - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + + nfs4_state_protect(NFS_SERVER(dir)->nfs_client, + NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); + + status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, + &res.seq_res, 0); dprintk("NFS reply secinfo: %d\n", status); + + if (cred) + put_rpccred(cred); + return status; } @@ -5161,8 +6385,23 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_secinfo(dir, name, flavors), + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(NFS_SERVER(dir)->nfs_client)) + err = _nfs4_proc_secinfo(dir, name, flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs4_proc_secinfo(dir, name, flavors, false); + + trace_nfs4_secinfo(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; @@ -5226,6 +6465,7 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_bind_conn_to_session(clp, status); if (status == 0) { if (memcmp(res.session->sess_id.data, clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5254,22 +6494,139 @@ out: } /* - * nfs4_proc_exchange_id() + * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map + * and operations we'd like to see to enable certain features in the allow map + */ +static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { + .how = SP4_MACH_CRED, + .enforce.u.words = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + }, + .allow.u.words = { + [0] = 1 << (OP_CLOSE) | + 1 << (OP_LOCKU) | + 1 << (OP_COMMIT), + [1] = 1 << (OP_SECINFO - 32) | + 1 << (OP_SECINFO_NO_NAME - 32) | + 1 << (OP_TEST_STATEID - 32) | + 1 << (OP_FREE_STATEID - 32) | + 1 << (OP_WRITE - 32) + } +}; + +/* + * Select the state protection mode for client `clp' given the server results + * from exchange_id in `sp'. * - * Returns zero, a negative errno, or a negative NFS4ERR status code. + * Returns 0 on success, negative errno otherwise. + */ +static int nfs4_sp4_select_mode(struct nfs_client *clp, + struct nfs41_state_protection *sp) +{ + static const u32 supported_enforce[NFS4_OP_MAP_NUM_WORDS] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + }; + unsigned int i; + + if (sp->how == SP4_MACH_CRED) { + /* Print state protect result */ + dfprintk(MOUNT, "Server SP4_MACH_CRED support:\n"); + for (i = 0; i <= LAST_NFS4_OP; i++) { + if (test_bit(i, sp->enforce.u.longs)) + dfprintk(MOUNT, " enforce op %d\n", i); + if (test_bit(i, sp->allow.u.longs)) + dfprintk(MOUNT, " allow op %d\n", i); + } + + /* make sure nothing is on enforce list that isn't supported */ + for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) { + if (sp->enforce.u.words[i] & ~supported_enforce[i]) { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + return -EINVAL; + } + } + + /* + * Minimal mode - state operations are allowed to use machine + * credential. Note this already happens by default, so the + * client doesn't have to do anything more than the negotiation. + * + * NOTE: we don't care if EXCHANGE_ID is in the list - + * we're already using the machine cred for exchange_id + * and will never use a different cred. + */ + if (test_bit(OP_BIND_CONN_TO_SESSION, sp->enforce.u.longs) && + test_bit(OP_CREATE_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) { + dfprintk(MOUNT, "sp4_mach_cred:\n"); + dfprintk(MOUNT, " minimal mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags); + } else { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + return -EINVAL; + } + + if (test_bit(OP_CLOSE, sp->allow.u.longs) && + test_bit(OP_LOCKU, sp->allow.u.longs)) { + dfprintk(MOUNT, " cleanup mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); + } + + if (test_bit(OP_SECINFO, sp->allow.u.longs) && + test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { + dfprintk(MOUNT, " secinfo mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags); + } + + if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) && + test_bit(OP_FREE_STATEID, sp->allow.u.longs)) { + dfprintk(MOUNT, " stateid mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags); + } + + if (test_bit(OP_WRITE, sp->allow.u.longs)) { + dfprintk(MOUNT, " write mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags); + } + + if (test_bit(OP_COMMIT, sp->allow.u.longs)) { + dfprintk(MOUNT, " commit mode enabled\n"); + set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags); + } + } + + return 0; +} + +/* + * _nfs4_proc_exchange_id() * - * Since the clientid has expired, all compounds using sessions - * associated with the stale clientid will be returning - * NFS4ERR_BADSESSION in the sequence operation, and will therefore - * be in some phase of session reset. + * Wrapper for EXCHANGE_ID operation. */ -int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, + u32 sp4_how) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { .verifier = &verifier, .client = clp, - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, +#ifdef CONFIG_NFS_V4_1_MIGRATION + .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID | + EXCHGID4_FLAG_SUPP_MOVED_MIGR, +#else + .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID, +#endif }; struct nfs41_exchange_id_res res = { 0 @@ -5309,10 +6666,30 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out_server_scope; } + switch (sp4_how) { + case SP4_NONE: + args.state_protect.how = SP4_NONE; + break; + + case SP4_MACH_CRED: + args.state_protect = nfs4_sp4_mach_cred_request; + break; + + default: + /* unsupported! */ + WARN_ON_ONCE(1); + status = -EINVAL; + goto out_server_scope; + } + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_exchange_id(clp, status); if (status == 0) status = nfs4_check_cl_exchange_flags(res.flags); + if (status == 0) + status = nfs4_sp4_select_mode(clp, &res.state_protect); + if (status == 0) { clp->cl_clientid = res.clientid; clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); @@ -5359,6 +6736,35 @@ out: return status; } +/* + * nfs4_proc_exchange_id() + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + * + * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used. + */ +int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +{ + rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor; + int status; + + /* try SP4_MACH_CRED if krb5i/p */ + if (authflavor == RPC_AUTH_GSS_KRB5I || + authflavor == RPC_AUTH_GSS_KRB5P) { + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); + if (!status) + return 0; + } + + /* try SP4_NONE */ + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); +} + static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, struct rpc_cred *cred) { @@ -5370,6 +6776,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, int status; status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " "DESTROY_CLIENTID.", status, clp->cl_hostname); @@ -5407,7 +6814,7 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_preserve_clid) goto out; - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) put_rpccred(cred); @@ -5499,7 +6906,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; - nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); nfs4_set_sequence_privileged(&args.la_seq_args); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5526,17 +6933,14 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) */ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) { - struct nfs4_session *session = args->client->cl_session; - unsigned int mxrqst_sz = session->fc_target_max_rqst_sz, - mxresp_sz = session->fc_target_max_resp_sz; + unsigned int max_rqst_sz, max_resp_sz; + + max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; + max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; - if (mxrqst_sz == 0) - mxrqst_sz = NFS_MAX_FILE_IO_SIZE; - if (mxresp_sz == 0) - mxresp_sz = NFS_MAX_FILE_IO_SIZE; /* Fore channel attributes */ - args->fc_attrs.max_rqst_sz = mxrqst_sz; - args->fc_attrs.max_resp_sz = mxresp_sz; + args->fc_attrs.max_rqst_sz = max_rqst_sz; + args->fc_attrs.max_resp_sz = max_resp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; args->fc_attrs.max_reqs = max_session_slots; @@ -5636,6 +7040,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_create_session(clp, status); if (!status) { /* Verify the session's negotiated channel_attrs values */ @@ -5699,6 +7104,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, return status; status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + trace_nfs4_destroy_session(session->clp, status); if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " @@ -5748,6 +7154,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp)) return; + trace_nfs4_sequence(clp, task->tk_status); if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (atomic_read(&clp->cl_count) == 1) @@ -5795,7 +7202,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; if (!atomic_inc_not_zero(&clp->cl_count)) @@ -5805,7 +7212,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, nfs_put_client(clp); return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_init_sequence(&calldata->args, &calldata->res, 0); if (is_privileged) nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; @@ -5900,6 +7307,7 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) if (!nfs41_sequence_done(task, res)) return; + trace_nfs4_reclaim_complete(clp, task->tk_status); if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); return; @@ -5923,12 +7331,14 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { /* * Issue a global reclaim complete. */ -static int nfs41_proc_reclaim_complete(struct nfs_client *clp) +static int nfs41_proc_reclaim_complete(struct nfs_client *clp, + struct rpc_cred *cred) { struct nfs4_reclaim_complete_data *calldata; struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], + .rpc_cred = cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clp->cl_rpcclient, @@ -5945,7 +7355,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->clp = clp; calldata->arg.one_fs = 0; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); nfs4_set_sequence_privileged(&calldata->arg.seq_args); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; @@ -5995,8 +7405,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; struct nfs4_state *state = NULL; + unsigned long timeo, now, giveup; - dprintk("--> %s\n", __func__); + dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); if (!nfs41_sequence_done(task, &lgp->res.seq_res)) goto out; @@ -6004,9 +7415,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: goto out; + /* + * NFS4ERR_LAYOUTTRYLATER is a conflict with another client + * (or clients) writing to the same RAID stripe + */ case -NFS4ERR_LAYOUTTRYLATER: + /* + * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall + * existing layout before getting a new one). + */ case -NFS4ERR_RECALLCONFLICT: - task->tk_status = -NFS4ERR_DELAY; + timeo = rpc_get_timeout(task->tk_client); + giveup = lgp->args.timestamp + timeo; + now = jiffies; + if (time_after(giveup, now)) { + unsigned long delay; + + /* Delay for: + * - Not less then NFS4_POLL_RETRY_MIN. + * - One last time a jiffie before we give up + * - exponential backoff (time_now minus start_attempt) + */ + delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN, + min((giveup - now - 1), + now - lgp->args.timestamp)); + + dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", + __func__, delay); + rpc_delay(task, delay); + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out; /* Do not call nfs4_async_handle_error() */ + } break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: @@ -6079,11 +7519,13 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct inode *inode = lgp->args.inode; + struct nfs_server *server = NFS_SERVER(inode); size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); nfs4_free_pages(lgp->args.layout.pages, max_pages); + pnfs_put_layout_hdr(NFS_I(inode)->layout); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6098,13 +7540,15 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { struct pnfs_layout_segment * nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { - struct nfs_server *server = NFS_SERVER(lgp->args.inode); + struct inode *inode = lgp->args.inode; + struct nfs_server *server = NFS_SERVER(inode); size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], .rpc_argp = &lgp->args, .rpc_resp = &lgp->res, + .rpc_cred = lgp->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = server->client, @@ -6124,17 +7568,27 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) return ERR_PTR(-ENOMEM); } lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->args.timestamp = jiffies; lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; - nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); + + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; - if (status == 0) + trace_nfs4_layoutget(lgp->args.ctx, + &lgp->args.range, + &lgp->res.range, + status); + /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ + if (status == 0 && lgp->res.layoutp->len) lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); @@ -6166,7 +7620,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; server = NFS_SERVER(lrp->args.inode); - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + switch (task->tk_status) { + default: + task->tk_status = 0; + case 0: + break; + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) + break; rpc_restart_call_prepare(task); return; } @@ -6202,9 +7663,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], .rpc_argp = &lrp->args, .rpc_resp = &lrp->res, + .rpc_cred = lrp->cred, }; struct rpc_task_setup task_setup_data = { - .rpc_client = lrp->clp->cl_rpcclient, + .rpc_client = NFS_SERVER(lrp->args.inode)->client, .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, @@ -6212,11 +7674,12 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) int status; dprintk("--> %s\n", __func__); - nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); status = task->tk_status; + trace_nfs4_layoutreturn(lrp->args.inode, status); dprintk("<-- %s status=%d\n", __func__, status); rpc_put_task(task); return status; @@ -6271,7 +7734,9 @@ int nfs4_proc_getdevicelist(struct nfs_server *server, EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); static int -_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +_nfs4_proc_getdeviceinfo(struct nfs_server *server, + struct pnfs_device *pdev, + struct rpc_cred *cred) { struct nfs4_getdeviceinfo_args args = { .pdev = pdev, @@ -6283,6 +7748,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], .rpc_argp = &args, .rpc_resp = &res, + .rpc_cred = cred, }; int status; @@ -6293,14 +7759,16 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) return status; } -int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) +int nfs4_proc_getdeviceinfo(struct nfs_server *server, + struct pnfs_device *pdev, + struct rpc_cred *cred) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_proc_getdeviceinfo(server, pdev), + _nfs4_proc_getdeviceinfo(server, pdev, cred), &exception); } while (exception.retry); return err; @@ -6334,10 +7802,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) case -NFS4ERR_BADLAYOUT: /* no layout */ case -NFS4ERR_GRACE: /* loca_recalim always false */ task->tk_status = 0; - break; case 0: - nfs_post_op_update_inode_force_wcc(data->args.inode, - data->res.fattr); break; default: if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { @@ -6350,22 +7815,10 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; - struct pnfs_layout_segment *lseg, *tmp; - unsigned long *bitlock = &NFS_I(data->args.inode)->flags; pnfs_cleanup_layoutcommit(data); - /* Matched by references in pnfs_set_layoutcommit */ - list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { - list_del_init(&lseg->pls_lc_list); - if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, - &lseg->pls_flags)) - pnfs_put_lseg(lseg); - } - - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); - smp_mb__after_clear_bit(); - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); - + nfs_post_op_update_inode_force_wcc(data->args.inode, + data->res.fattr); put_rpccred(data->cred); kfree(data); } @@ -6402,7 +7855,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -6412,15 +7865,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) if (status != 0) goto out; status = task->tk_status; + trace_nfs4_layoutcommit(data->args.inode, status); out: dprintk("%s: status %d\n", __func__, status); rpc_put_task(task); return status; } +/** + * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if + * possible) as per RFC3530bis and RFC5661 Security Considerations sections + */ static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) + struct nfs_fsinfo *info, + struct nfs4_secinfo_flavors *flavors, bool use_integrity) { struct nfs41_secinfo_no_name_args args = { .style = SECINFO_STYLE_CURRENT_FH, @@ -6433,7 +7892,25 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + struct rpc_clnt *clnt = server->client; + struct rpc_cred *cred = NULL; + int status; + + if (use_integrity) { + clnt = server->nfs_client->cl_rpcclient; + cred = nfs4_get_clid_cred(server->nfs_client); + msg.rpc_cred = cred; + } + + dprintk("--> %s\n", __func__); + status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, + &res.seq_res, 0); + dprintk("<-- %s status=%d\n", __func__, status); + + if (cred) + put_rpccred(cred); + + return status; } static int @@ -6443,11 +7920,28 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs4_exception exception = { }; int err; do { - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + /* first try using integrity protection */ + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(server->nfs_client)) + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, + flavors, false); + switch (err) { case 0: case -NFS4ERR_WRONGSEC: - case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: goto out; default: err = nfs4_handle_exception(server, err, &exception); @@ -6463,8 +7957,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, { int err; struct page *page; - rpc_authflavor_t flavor; + rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; struct nfs4_secinfo_flavors *flavors; + struct nfs4_secinfo4 *secinfo; + int i; page = alloc_page(GFP_KERNEL); if (!page) { @@ -6479,16 +7975,41 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ - if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { err = nfs4_find_root_sec(server, fhandle, info); goto out_freepage; } if (err) goto out_freepage; - flavor = nfs_find_best_sec(flavors); - if (err == 0) - err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + for (i = 0; i < flavors->num_flavors; i++) { + secinfo = &flavors->flavors[i]; + + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + flavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + break; + default: + flavor = RPC_AUTH_MAXFLAVOR; + break; + } + + if (!nfs_auth_info_match(&server->auth_info, flavor)) + flavor = RPC_AUTH_MAXFLAVOR; + + if (flavor != RPC_AUTH_MAXFLAVOR) { + err = nfs4_lookup_root_sec(server, fhandle, + info, flavor); + if (!err) + break; + } + } + + if (flavor == RPC_AUTH_MAXFLAVOR) + err = -EPERM; out_freepage: put_page(page); @@ -6498,7 +8019,9 @@ out: return err; } -static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int _nfs41_test_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) { int status; struct nfs41_test_stateid_args args = { @@ -6509,12 +8032,17 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], .rpc_argp = &args, .rpc_resp = &res, + .rpc_cred = cred, }; + struct rpc_clnt *rpc_client = server->client; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &rpc_client, &msg); dprintk("NFS call test_stateid %p\n", stateid); - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(server->client, server, &msg, + status = nfs4_call_sync_sequence(rpc_client, server, &msg, &args.seq_args, &res.seq_res); if (status != NFS_OK) { dprintk("NFS reply test_stateid: failed, %d\n", status); @@ -6529,17 +8057,20 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) * * @server: server / transport on which to perform the operation * @stateid: state ID to test + * @cred: credential * * Returns NFS_OK if the server recognizes that "stateid" is valid. * Otherwise a negative NFS4ERR value is returned if the operation * failed or the state ID is not currently valid. */ -static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int nfs41_test_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) { struct nfs4_exception exception = { }; int err; do { - err = _nfs41_test_stateid(server, stateid); + err = _nfs41_test_stateid(server, stateid, cred); if (err != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, err, &exception); @@ -6547,26 +8078,81 @@ static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) return err; } -static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) -{ - struct nfs41_free_stateid_args args = { - .stateid = stateid, - }; +struct nfs_free_stateid_data { + struct nfs_server *server; + struct nfs41_free_stateid_args args; struct nfs41_free_stateid_res res; +}; + +static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_free_stateid_data *data = calldata; + nfs41_setup_sequence(nfs4_get_session(data->server), + &data->args.seq_args, + &data->res.seq_res, + task); +} + +static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) +{ + struct nfs_free_stateid_data *data = calldata; + + nfs41_sequence_done(task, &data->res.seq_res); + + switch (task->tk_status) { + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN) + rpc_restart_call_prepare(task); + } +} + +static void nfs41_free_stateid_release(void *calldata) +{ + kfree(calldata); +} + +static const struct rpc_call_ops nfs41_free_stateid_ops = { + .rpc_call_prepare = nfs41_free_stateid_prepare, + .rpc_call_done = nfs41_free_stateid_done, + .rpc_release = nfs41_free_stateid_release, +}; + +static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred, + bool privileged) +{ struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], - .rpc_argp = &args, - .rpc_resp = &res, + .rpc_cred = cred, }; - int status; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs41_free_stateid_ops, + .flags = RPC_TASK_ASYNC, + }; + struct nfs_free_stateid_data *data; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &task_setup.rpc_client, &msg); dprintk("NFS call free_stateid %p\n", stateid); - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(server->client, server, &msg, - &args.seq_args, &res.seq_res); - dprintk("NFS reply free_stateid: %d\n", status); - return status; + data = kmalloc(sizeof(*data), GFP_NOFS); + if (!data) + return ERR_PTR(-ENOMEM); + data->server = server; + nfs4_stateid_copy(&data->args.stateid, stateid); + + task_setup.callback_data = data; + + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + if (privileged) + nfs4_set_sequence_privileged(&data->args.seq_args); + + return rpc_run_task(&task_setup); } /** @@ -6574,21 +8160,39 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) * * @server: server / transport on which to perform the operation * @stateid: state ID to release + * @cred: credential * * Returns NFS_OK if the server freed "stateid". Otherwise a * negative NFS4ERR value is returned. */ -static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) +static int nfs41_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + struct rpc_cred *cred) { - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_free_stateid(server, stateid); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); - } while (exception.retry); - return err; + struct rpc_task *task; + int ret; + + task = _nfs41_free_stateid(server, stateid, cred, true); + if (IS_ERR(task)) + return PTR_ERR(task); + ret = rpc_wait_for_completion_task(task); + if (!ret) + ret = task->tk_status; + rpc_put_task(task); + return ret; +} + +static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) +{ + struct rpc_task *task; + struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + + task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); + nfs4_free_lock_state(server, lsp); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } static bool nfs41_match_stateid(const nfs4_stateid *s1, @@ -6620,7 +8224,6 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, .detect_trunking = nfs40_discover_server_trunking, }; @@ -6631,7 +8234,6 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, .detect_trunking = nfs41_discover_server_trunking, }; @@ -6643,7 +8245,6 @@ static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, }; #if defined(CONFIG_NFS_V4_1) @@ -6653,7 +8254,6 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .recover_open = nfs41_open_expired, .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, }; #endif /* CONFIG_NFS_V4_1 */ @@ -6671,22 +8271,73 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { }; #endif +static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = { + .get_locations = _nfs40_proc_get_locations, + .fsid_present = _nfs40_proc_fsid_present, +}; + +#if defined(CONFIG_NFS_V4_1) +static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = { + .get_locations = _nfs41_proc_get_locations, + .fsid_present = _nfs41_proc_fsid_present, +}; +#endif /* CONFIG_NFS_V4_1 */ + static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, - .call_sync = _nfs4_call_sync, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK, + .init_client = nfs40_init_client, + .shutdown_client = nfs40_shutdown_client, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, + .free_lock_state = nfs4_release_lockowner, + .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, + .mig_recovery_ops = &nfs40_mig_recovery_ops, }; #if defined(CONFIG_NFS_V4_1) static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, - .call_sync = nfs4_call_sync_sequence, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, + .free_lock_state = nfs41_free_lock_state, + .call_sync_ops = &nfs41_call_sync_ops, + .reboot_recovery_ops = &nfs41_reboot_recovery_ops, + .nograce_recovery_ops = &nfs41_nograce_recovery_ops, + .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, +}; +#endif + +#if defined(CONFIG_NFS_V4_2) +static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { + .minor_version = 2, + .init_caps = NFS_CAP_READDIRPLUS + | NFS_CAP_ATOMIC_OPEN + | NFS_CAP_CHANGE_ATTR + | NFS_CAP_POSIX_LOCK + | NFS_CAP_STATEID_NFSV41 + | NFS_CAP_ATOMIC_OPEN_V1, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, + .match_stateid = nfs41_match_stateid, + .find_root_sec = nfs41_find_root_sec, + .free_lock_state = nfs41_free_lock_state, + .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, @@ -6698,9 +8349,12 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #if defined(CONFIG_NFS_V4_1) [1] = &nfs_v4_1_minor_ops, #endif +#if defined(CONFIG_NFS_V4_2) + [2] = &nfs_v4_2_minor_ops, +#endif }; -const struct inode_operations nfs4_dir_inode_operations = { +static const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, @@ -6749,7 +8403,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_setup = nfs4_proc_unlink_setup, .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, - .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, @@ -6764,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, + .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, .read_setup = nfs4_proc_read_setup, - .read_pageio_init = pnfs_pageio_init_read, - .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_pageio_init = pnfs_pageio_init_write, - .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, @@ -6797,6 +8447,9 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { const struct xattr_handler *nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + &nfs4_xattr_nfs4_label_handler, +#endif NULL }; diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index ebda5f4a031..e799dc3c3b1 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -23,6 +23,14 @@ #define NFSDBG_FACILITY NFSDBG_STATE +static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue) +{ + tbl->highest_used_slotid = NFS4_NO_SLOT; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue); + init_completion(&tbl->complete); +} + /* * nfs4_shrink_slot_table - free retired slots from the slot table */ @@ -44,6 +52,17 @@ static void nfs4_shrink_slot_table(struct nfs4_slot_table *tbl, u32 newsize) } } +/** + * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete + * @tbl - controlling slot table + * + */ +void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl) +{ + if (nfs4_slot_tbl_draining(tbl)) + complete(&tbl->complete); +} + /* * nfs4_free_slot - free a slot and efficiently update slot table. * @@ -73,10 +92,10 @@ void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) tbl->highest_used_slotid = new_max; else { tbl->highest_used_slotid = NFS4_NO_SLOT; - nfs4_session_drain_complete(tbl->session, tbl); + nfs4_slot_tbl_drain_complete(tbl); } } - dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + dprintk("%s: slotid %u highest_used_slotid %u\n", __func__, slotid, tbl->highest_used_slotid); } @@ -146,9 +165,9 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl) ret->generation = tbl->generation; out: - dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, - !IS_ERR(ret) ? ret->slot_nr : -1); + !IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT); return ret; } @@ -191,7 +210,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, { int ret; - dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, + dprintk("--> %s: max_reqs=%u, tbl->max_slots %u\n", __func__, max_reqs, tbl->max_slots); if (max_reqs > NFS4_MAX_SLOT_TABLE) @@ -205,18 +224,45 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue); spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + dprintk("%s: tbl=%p slots=%p max_slots=%u\n", __func__, tbl, tbl->slots, tbl->max_slots); out: dprintk("<-- %s: return %d\n", __func__, ret); return ret; } -/* Destroy the slot table */ -static void nfs4_destroy_slot_tables(struct nfs4_session *session) +/* + * nfs4_release_slot_table - release all slot table entries + */ +static void nfs4_release_slot_table(struct nfs4_slot_table *tbl) +{ + nfs4_shrink_slot_table(tbl, 0); +} + +/** + * nfs4_shutdown_slot_table - release resources attached to a slot table + * @tbl: slot table to shut down + * + */ +void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl) { - nfs4_shrink_slot_table(&session->fc_slot_table, 0); - nfs4_shrink_slot_table(&session->bc_slot_table, 0); + nfs4_release_slot_table(tbl); + rpc_destroy_wait_queue(&tbl->slot_tbl_waitq); +} + +/** + * nfs4_setup_slot_table - prepare a stand-alone slot table for use + * @tbl: slot table to set up + * @max_reqs: maximum number of requests allowed + * @queue: name to give RPC wait queue + * + * Returns zero on success, or a negative errno. + */ +int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs, + const char *queue) +{ + nfs4_init_slot_table(tbl, queue); + return nfs4_realloc_slot_table(tbl, max_reqs, 0); } static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) @@ -226,7 +272,7 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot) struct nfs4_slot *slot = pslot; struct nfs4_slot_table *tbl = slot->table; - if (nfs4_session_draining(tbl->session) && !args->sa_privileged) + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) return false; slot->generation = tbl->generation; args->sa_slot = slot; @@ -273,6 +319,8 @@ void nfs41_wake_slot_table(struct nfs4_slot_table *tbl) } } +#if defined(CONFIG_NFS_V4_1) + static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl, u32 target_highest_slotid) { @@ -383,6 +431,12 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, spin_unlock(&tbl->slot_tbl_lock); } +static void nfs4_release_session_slot_tables(struct nfs4_session *session) +{ + nfs4_release_slot_table(&session->fc_slot_table); + nfs4_release_slot_table(&session->bc_slot_table); +} + /* * Initialize or reset the forechannel and backchannel tables */ @@ -405,43 +459,38 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses) if (status && tbl->slots == NULL) /* Fore and back channel share a connection so get * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); + nfs4_release_session_slot_tables(ses); return status; } struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) { struct nfs4_session *session; - struct nfs4_slot_table *tbl; session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); if (!session) return NULL; - tbl = &session->fc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); - init_completion(&tbl->complete); - - tbl = &session->bc_slot_table; - tbl->highest_used_slotid = NFS4_NO_SLOT; - spin_lock_init(&tbl->slot_tbl_lock); - rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); - init_completion(&tbl->complete); - + nfs4_init_slot_table(&session->fc_slot_table, "ForeChannel Slot table"); + nfs4_init_slot_table(&session->bc_slot_table, "BackChannel Slot table"); session->session_state = 1<<NFS4_SESSION_INITING; session->clp = clp; return session; } +static void nfs4_destroy_session_slot_tables(struct nfs4_session *session) +{ + nfs4_shutdown_slot_table(&session->fc_slot_table); + nfs4_shutdown_slot_table(&session->bc_slot_table); +} + void nfs4_destroy_session(struct nfs4_session *session) { struct rpc_xprt *xprt; struct rpc_cred *cred; - cred = nfs4_get_exchange_id_cred(session->clp); + cred = nfs4_get_clid_cred(session->clp); nfs4_proc_destroy_session(session, cred); if (cred) put_rpccred(cred); @@ -452,7 +501,7 @@ void nfs4_destroy_session(struct nfs4_session *session) dprintk("%s Destroy backchannel for xprt %p\n", __func__, xprt); xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); - nfs4_destroy_slot_tables(session); + nfs4_destroy_session_slot_tables(session); kfree(session); } @@ -478,48 +527,12 @@ static int nfs41_check_session_ready(struct nfs_client *clp) return 0; } -int nfs4_init_session(struct nfs_server *server) +int nfs4_init_session(struct nfs_client *clp) { - struct nfs_client *clp = server->nfs_client; - struct nfs4_session *session; - unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE; - unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE; - if (!nfs4_has_session(clp)) return 0; - if (server->rsize != 0) - target_max_resp_sz = server->rsize; - target_max_resp_sz += nfs41_maxread_overhead; - - if (server->wsize != 0) - target_max_rqst_sz = server->wsize; - target_max_rqst_sz += nfs41_maxwrite_overhead; - - session = clp->cl_session; - spin_lock(&clp->cl_lock); - if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) { - /* Initialise targets and channel attributes */ - session->fc_target_max_rqst_sz = target_max_rqst_sz; - session->fc_attrs.max_rqst_sz = target_max_rqst_sz; - session->fc_target_max_resp_sz = target_max_resp_sz; - session->fc_attrs.max_resp_sz = target_max_resp_sz; - } else { - /* Just adjust the targets */ - if (target_max_rqst_sz > session->fc_target_max_rqst_sz) { - session->fc_target_max_rqst_sz = target_max_rqst_sz; - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - } - if (target_max_resp_sz > session->fc_target_max_resp_sz) { - session->fc_target_max_resp_sz = target_max_resp_sz; - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - } - } - spin_unlock(&clp->cl_lock); - - if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) - nfs4_schedule_lease_recovery(clp); - + clear_bit(NFS4_SESSION_INITING, &clp->cl_session->session_state); return nfs41_check_session_ready(clp); } @@ -549,4 +562,4 @@ int nfs4_init_ds_session(struct nfs_client *clp, unsigned long lease_time) } EXPORT_SYMBOL_GPL(nfs4_init_ds_session); - +#endif /* defined(CONFIG_NFS_V4_1) */ diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 6f3cb39386d..b34ada9bc6a 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -8,7 +8,7 @@ #define __LINUX_FS_NFS_NFS4SESSION_H /* maximum number of slots to use */ -#define NFS4_DEF_SLOT_TABLE_SIZE (16U) +#define NFS4_DEF_SLOT_TABLE_SIZE (64U) #define NFS4_MAX_SLOT_TABLE (1024U) #define NFS4_NO_SLOT ((u32)-1) @@ -25,6 +25,10 @@ struct nfs4_slot { }; /* Sessions */ +enum nfs4_slot_tbl_state { + NFS4_SLOT_TBL_DRAINING, +}; + #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) struct nfs4_slot_table { struct nfs4_session *session; /* Parent session */ @@ -43,6 +47,7 @@ struct nfs4_slot_table { unsigned long generation; /* Generation counter for target_highest_slotid */ struct completion complete; + unsigned long slot_tbl_state; }; /* @@ -61,20 +66,28 @@ struct nfs4_session { struct nfs4_channel_attrs bc_attrs; struct nfs4_slot_table bc_slot_table; struct nfs_client *clp; - /* Create session arguments */ - unsigned int fc_target_max_rqst_sz; - unsigned int fc_target_max_resp_sz; }; enum nfs4_session_state { NFS4_SESSION_INITING, - NFS4_SESSION_DRAINING, }; -#if defined(CONFIG_NFS_V4_1) +extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, + unsigned int max_reqs, const char *queue); +extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl); extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl); extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot); +extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl); +bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, + struct nfs4_slot *slot); +void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); +static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl) +{ + return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); +} + +#if defined(CONFIG_NFS_V4_1) extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl, u32 target_highest_slotid); extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl, @@ -85,21 +98,9 @@ extern int nfs4_setup_session_slot_tables(struct nfs4_session *ses); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern void nfs4_destroy_session(struct nfs4_session *session); -extern int nfs4_init_session(struct nfs_server *server); +extern int nfs4_init_session(struct nfs_client *clp); extern int nfs4_init_ds_session(struct nfs_client *, unsigned long); -extern void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl); - -static inline bool nfs4_session_draining(struct nfs4_session *session) -{ - return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state); -} - -bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl, - struct nfs4_slot *slot); -void nfs41_wake_slot_table(struct nfs4_slot_table *tbl); - /* * Determine if sessions are in use. */ @@ -117,9 +118,19 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) return 0; } +#ifdef CONFIG_CRC32 +/* + * nfs_session_id_hash - calculate the crc32 hash for the session id + * @session - pointer to session + */ +#define nfs_session_id_hash(sess_id) \ + (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) +#else +#define nfs_session_id_hash(session) (0) +#endif #else /* defined(CONFIG_NFS_V4_1) */ -static inline int nfs4_init_session(struct nfs_server *server) +static inline int nfs4_init_session(struct nfs_client *clp) { return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9448c579d41..848f6853c59 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp, clp->cl_confirm = clid.confirm; status = nfs40_walk_client_list(clp, result, cred); - switch (status) { - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - case 0: + if (status == 0) { /* Sustain the lease, even if it's empty. If the clientid4 * goes stale it's of no use for trunking discovery. */ nfs4_schedule_state_renewal(*result); - break; } - out: return status; } @@ -159,13 +154,14 @@ struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) return cred; } -static void nfs4_clear_machine_cred(struct nfs_client *clp) +static void nfs4_root_machine_cred(struct nfs_client *clp) { - struct rpc_cred *cred; + struct rpc_cred *cred, *new; + new = rpc_lookup_machine_cred(NULL); spin_lock(&clp->cl_lock); cred = clp->cl_machine_cred; - clp->cl_machine_cred = NULL; + clp->cl_machine_cred = new; spin_unlock(&clp->cl_lock); if (cred != NULL) put_rpccred(cred); @@ -219,67 +215,36 @@ out: return cred; } -#if defined(CONFIG_NFS_V4_1) - -static int nfs41_setup_state_renewal(struct nfs_client *clp) +static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) { - int status; - struct nfs_fsinfo fsinfo; - - if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { - nfs4_schedule_state_renewal(clp); - return 0; - } - - status = nfs4_proc_get_lease_time(clp, &fsinfo); - if (status == 0) { - /* Update lease time and schedule renewal */ - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = jiffies; - spin_unlock(&clp->cl_lock); - - nfs4_schedule_state_renewal(clp); + if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { + spin_lock(&tbl->slot_tbl_lock); + nfs41_wake_slot_table(tbl); + spin_unlock(&tbl->slot_tbl_lock); } - - return status; } -/* - * Back channel returns NFS4ERR_DELAY for new requests when - * NFS4_SESSION_DRAINING is set so there is no work to be done when draining - * is ended. - */ static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl; - if (ses == NULL) + if (clp->cl_slot_tbl) { + nfs4_end_drain_slot_table(clp->cl_slot_tbl); return; - tbl = &ses->fc_slot_table; - if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - spin_lock(&tbl->slot_tbl_lock); - nfs41_wake_slot_table(tbl); - spin_unlock(&tbl->slot_tbl_lock); } -} -/* - * Signal state manager thread if session fore channel is drained - */ -void nfs4_session_drain_complete(struct nfs4_session *session, - struct nfs4_slot_table *tbl) -{ - if (nfs4_session_draining(session)) - complete(&tbl->complete); + if (ses != NULL) { + nfs4_end_drain_slot_table(&ses->bc_slot_table); + nfs4_end_drain_slot_table(&ses->fc_slot_table); + } } -static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) +static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { + set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { - INIT_COMPLETION(tbl->complete); + reinit_completion(&tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); } @@ -292,13 +257,41 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) struct nfs4_session *ses = clp->cl_session; int ret = 0; - set_bit(NFS4_SESSION_DRAINING, &ses->session_state); + if (clp->cl_slot_tbl) + return nfs4_drain_slot_tbl(clp->cl_slot_tbl); + /* back channel */ - ret = nfs4_wait_on_slot_tbl(&ses->bc_slot_table); + ret = nfs4_drain_slot_tbl(&ses->bc_slot_table); if (ret) return ret; /* fore channel */ - return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); + return nfs4_drain_slot_tbl(&ses->fc_slot_table); +} + +#if defined(CONFIG_NFS_V4_1) + +static int nfs41_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { + nfs4_schedule_state_renewal(clp); + return 0; + } + + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } + + return status; } static void nfs41_finish_session_reset(struct nfs_client *clp) @@ -358,62 +351,21 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, return nfs41_walk_client_list(clp, result, cred); } -struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) -{ - struct rpc_cred *cred; - - spin_lock(&clp->cl_lock); - cred = nfs4_get_machine_cred_locked(clp); - spin_unlock(&clp->cl_lock); - return cred; -} - #endif /* CONFIG_NFS_V4_1 */ -static struct rpc_cred * -nfs4_get_setclientid_cred_server(struct nfs_server *server) -{ - struct nfs_client *clp = server->nfs_client; - struct rpc_cred *cred = NULL; - struct nfs4_state_owner *sp; - struct rb_node *pos; - - spin_lock(&clp->cl_lock); - pos = rb_first(&server->state_owners); - if (pos != NULL) { - sp = rb_entry(pos, struct nfs4_state_owner, so_server_node); - cred = get_rpccred(sp->so_cred); - } - spin_unlock(&clp->cl_lock); - return cred; -} - /** - * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation + * nfs4_get_clid_cred - Acquire credential for a setclientid operation * @clp: client state handle * * Returns an rpc_cred with reference count bumped, or NULL. */ -struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp) { - struct nfs_server *server; struct rpc_cred *cred; spin_lock(&clp->cl_lock); cred = nfs4_get_machine_cred_locked(clp); spin_unlock(&clp->cl_lock); - if (cred != NULL) - goto out; - - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - cred = nfs4_get_setclientid_cred_server(server); - if (cred != NULL) - break; - } - rcu_read_unlock(); - -out: return cred; } @@ -523,6 +475,8 @@ nfs4_alloc_state_owner(struct nfs_server *server, nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); + seqcount_init(&sp->so_reclaim_seqcount); + mutex_init(&sp->so_delegreturn_mutex); return sp; } @@ -702,6 +656,8 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) list_for_each_entry(state, &nfsi->open_states, inode_states) { if (state->owner != owner) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (atomic_inc_not_zero(&state->count)) return state; } @@ -934,6 +890,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ */ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { + struct nfs_server *server; struct nfs4_state *state; if (lsp == NULL) @@ -945,11 +902,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); + server = state->owner->so_server; if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - if (nfs4_release_lockowner(lsp) == 0) - return; - } - nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp); + struct nfs_client *clp = server->nfs_client; + + clp->cl_mvops->free_lock_state(server, lsp); + } else + nfs4_free_lock_state(server, lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -990,13 +949,14 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, +static int nfs4_copy_lock_stateid(nfs4_stateid *dst, + struct nfs4_state *state, const struct nfs_lockowner *lockowner) { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; pid_t fl_pid; - bool ret = false; + int ret = -ENOENT; if (lockowner == NULL) @@ -1009,9 +969,11 @@ static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { + if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) + ret = -EIO; + else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); - ret = true; + ret = 0; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -1021,11 +983,15 @@ out: static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + const nfs4_stateid *src; int seq; do { + src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - nfs4_stateid_copy(dst, &state->stateid); + if (test_bit(NFS_OPEN_STATE, &state->flags)) + src = &state->open_stateid; + nfs4_stateid_copy(dst, src); } while (read_seqretry(&state->seqlock, seq)); } @@ -1033,14 +999,30 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) * Byte-range lock aware utility to initialize the stateid of read/write * requests. */ -void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, +int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, fmode_t fmode, const struct nfs_lockowner *lockowner) { - if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) - return; - if (nfs4_copy_lock_stateid(dst, state, lockowner)) - return; + int ret = nfs4_copy_lock_stateid(dst, state, lockowner); + if (ret == -EIO) + /* A lost lock - don't even consider delegations */ + goto out; + /* returns true if delegation stateid found and copied */ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { + ret = 0; + goto out; + } + if (ret != -ENOENT) + /* nfs4_copy_delegation_stateid() didn't over-write + * dst, so it still has the lock stateid which we now + * choose to use. + */ + goto out; nfs4_copy_open_stateid(dst, state); + ret = 0; +out: + if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) + dst->seqid = 0; + return ret; } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) @@ -1084,7 +1066,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) /* * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - - * see comments nfs_fs.h:seqid_mutating_error() + * see comments nfs4.h:seqid_mutating_error() */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { @@ -1129,7 +1111,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) /* * Increment the seqid if the LOCK/LOCKU succeeded, or * failed with a seqid incrementing error - - * see comments nfs_fs.h:seqid_mutating_error() + * see comments nfs4.h:seqid_mutating_error() */ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) { @@ -1158,9 +1140,9 @@ static int nfs4_run_state_manager(void *); static void nfs4_clear_state_manager_bit(struct nfs_client *clp) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); rpc_wake_up(&clp->cl_rpcwaitq); } @@ -1185,7 +1167,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) snprintf(buf, sizeof(buf), "%s-manager", rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); rcu_read_unlock(); - task = kthread_run(nfs4_run_state_manager, clp, buf); + task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); @@ -1210,20 +1192,74 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); +/** + * nfs4_schedule_migration_recovery - trigger migration recovery + * + * @server: FSID that is migrating + * + * Returns zero if recovery has started, otherwise a negative NFS4ERR + * value is returned. + */ +int nfs4_schedule_migration_recovery(const struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + + if (server->fh_expire_type != NFS4_FH_PERSISTENT) { + pr_err("NFS: volatile file handles not supported (server %s)\n", + clp->cl_hostname); + return -NFS4ERR_IO; + } + + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -NFS4ERR_IO; + + dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n", + __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + + set_bit(NFS_MIG_IN_TRANSITION, + &((struct nfs_server *)server)->mig_status); + set_bit(NFS4CLNT_MOVED, &clp->cl_state); + + nfs4_schedule_state_manager(clp); + return 0; +} +EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery); + +/** + * nfs4_schedule_lease_moved_recovery - start lease-moved recovery + * + * @clp: server to check for moved leases + * + */ +void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp) +{ + dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n", + __func__, clp->cl_clientid, clp->cl_hostname); + + set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state); + nfs4_schedule_state_manager(clp); +} +EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery); + int nfs4_wait_clnt_recover(struct nfs_client *clp) { int res; might_sleep(); + atomic_inc(&clp->cl_count); res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) - return res; - + goto out; if (clp->cl_cons_state < 0) - return clp->cl_cons_state; - return 0; + res = clp->cl_cons_state; +out: + nfs_put_client(clp); + return res; } int nfs4_client_recover_expired_lease(struct nfs_client *clp) @@ -1280,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st return 1; } -static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) +int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); @@ -1289,14 +1325,17 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s return 1; } -void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) +int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; + if (!nfs4_valid_open_stateid(state)) + return -EBADF; nfs4_state_mark_reclaim_nograce(clp, state); dprintk("%s: scheduling stateid recovery for server %s\n", __func__, clp->cl_hostname); nfs4_schedule_state_manager(clp); + return 0; } EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); @@ -1326,6 +1365,27 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + set_bit(NFS_CONTEXT_BAD, &ctx->flags); + } + spin_unlock(&inode->i_lock); +} + +static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) +{ + set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); + nfs4_state_mark_open_context_bad(state); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1340,13 +1400,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); /* Protect inode->i_flock using the BKL */ - lock_flocks(); + spin_lock(&inode->i_lock); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file)->state != state) continue; - unlock_flocks(); + spin_unlock(&inode->i_lock); status = ops->recover_lock(state, fl); switch (status) { case 0: @@ -1364,8 +1424,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d\n", + __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1373,9 +1433,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ /* kill_proc(fl->fl_pid, SIGLOST, 1); */ status = 0; } - lock_flocks(); + spin_lock(&inode->i_lock); } - unlock_flocks(); + spin_unlock(&inode->i_lock); out: up_write(&nfsi->rwsem); return status; @@ -1395,11 +1455,14 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ -restart: spin_lock(&sp->so_lock); + raw_write_seqcount_begin(&sp->so_reclaim_seqcount); +restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (state->state == 0) continue; atomic_inc(&state->count); @@ -1408,34 +1471,33 @@ restart: if (status >= 0) { status = nfs4_reclaim_locks(state, ops); if (status >= 0) { - spin_lock(&state->state_lock); - list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) - pr_warn_ratelimited("NFS: " - "%s: Lock reclaim " - "failed!\n", __func__); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { + spin_lock(&state->state_lock); + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) + pr_warn_ratelimited("NFS: " + "%s: Lock reclaim " + "failed!\n", __func__); + } + spin_unlock(&state->state_lock); } - spin_unlock(&state->state_lock); nfs4_put_open_state(state); + spin_lock(&sp->so_lock); goto restart; } } switch (status) { default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d\n", + __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: - /* - * Open state on this file cannot be recovered - * All we can do is revert to using the zero stateid. - */ - memset(&state->stateid, 0, - sizeof(state->stateid)); - /* Mark the file as being 'closed' */ - state->state = 0; + /* Open state on this file cannot be recovered */ + nfs4_state_mark_recovery_failed(state, status); break; + case -EAGAIN: + ssleep(1); case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_BAD_STATEID: @@ -1454,12 +1516,17 @@ restart: goto out_err; } nfs4_put_open_state(state); + spin_lock(&sp->so_lock); goto restart; } + raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); return 0; out_err: nfs4_put_open_state(state); + spin_lock(&sp->so_lock); + raw_write_seqcount_end(&sp->so_reclaim_seqcount); + spin_unlock(&sp->so_lock); return status; } @@ -1522,11 +1589,12 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) } static void nfs4_reclaim_complete(struct nfs_client *clp, - const struct nfs4_state_recovery_ops *ops) + const struct nfs4_state_recovery_ops *ops, + struct rpc_cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) - (void)ops->reclaim_complete(clp); + (void)ops->reclaim_complete(clp, cred); } static void nfs4_clear_reclaim_server(struct nfs_server *server) @@ -1571,9 +1639,15 @@ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { + const struct nfs4_state_recovery_ops *ops; + struct rpc_cred *cred; + if (!nfs4_state_clear_reclaim_reboot(clp)) return; - nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); + ops = clp->cl_mvops->reboot_recovery_ops; + cred = nfs4_get_clid_cred(clp); + nfs4_reclaim_complete(clp, ops, cred); + put_rpccred(cred); } static void nfs_delegation_clear_all(struct nfs_client *clp) @@ -1600,7 +1674,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) nfs4_state_end_reclaim_reboot(clp); break; case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); @@ -1685,13 +1758,17 @@ static int nfs4_check_lease(struct nfs_client *clp) cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - cred = nfs4_get_setclientid_cred(clp); + cred = nfs4_get_clid_cred(clp); status = -ENOKEY; if (cred == NULL) goto out; } status = ops->renew_lease(clp, cred); put_rpccred(cred); + if (status == -ETIMEDOUT) { + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + return 0; + } out: return nfs4_recovery_handle_error(clp, status); } @@ -1721,10 +1798,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); return -EPERM; case -EACCES: - if (clp->cl_machine_cred == NULL) - return -EACCES; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: case -ETIMEDOUT: case -EAGAIN: @@ -1757,7 +1830,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; - cred = ops->get_clid_cred(clp); + cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; status = ops->establish_clid(clp, cred); @@ -1801,6 +1874,168 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } +/* + * Try remote migration of one FSID from a source server to a + * destination server. The source server provides a list of + * potential destinations. + * + * Returns zero or a negative NFS4ERR status code. + */ +static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_fs_locations *locations = NULL; + struct inode *inode; + struct page *page; + int status, result; + + dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + + result = 0; + page = alloc_page(GFP_KERNEL); + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (page == NULL || locations == NULL) { + dprintk("<-- %s: no memory\n", __func__); + goto out; + } + + inode = server->super->s_root->d_inode; + result = nfs4_proc_get_locations(inode, locations, page, cred); + if (result) { + dprintk("<-- %s: failed to retrieve fs_locations: %d\n", + __func__, result); + goto out; + } + + result = -NFS4ERR_NXIO; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { + dprintk("<-- %s: No fs_locations data, migration skipped\n", + __func__); + goto out; + } + + nfs4_begin_drain_session(clp); + + status = nfs4_replace_transport(server, locations); + if (status != 0) { + dprintk("<-- %s: failed to replace transport: %d\n", + __func__, status); + goto out; + } + + result = 0; + dprintk("<-- %s: migration succeeded\n", __func__); + +out: + if (page != NULL) + __free_page(page); + kfree(locations); + if (result) { + pr_err("NFS: migration recovery failed (server %s)\n", + clp->cl_hostname); + set_bit(NFS_MIG_FAILED, &server->mig_status); + } + return result; +} + +/* + * Returns zero or a negative NFS4ERR status code. + */ +static int nfs4_handle_migration(struct nfs_client *clp) +{ + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + struct nfs_server *server; + struct rpc_cred *cred; + + dprintk("%s: migration reported on \"%s\"\n", __func__, + clp->cl_hostname); + + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) + return -NFS4ERR_NOENT; + + clp->cl_mig_gen++; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + int status; + + if (server->mig_gen == clp->cl_mig_gen) + continue; + server->mig_gen = clp->cl_mig_gen; + + if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION, + &server->mig_status)) + continue; + + rcu_read_unlock(); + status = nfs4_try_migration(server, cred); + if (status < 0) { + put_rpccred(cred); + return status; + } + goto restart; + } + rcu_read_unlock(); + put_rpccred(cred); + return 0; +} + +/* + * Test each nfs_server on the clp's cl_superblocks list to see + * if it's moved to another server. Stop when the server no longer + * returns NFS4ERR_LEASE_MOVED. + */ +static int nfs4_handle_lease_moved(struct nfs_client *clp) +{ + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + struct nfs_server *server; + struct rpc_cred *cred; + + dprintk("%s: lease moved reported on \"%s\"\n", __func__, + clp->cl_hostname); + + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) + return -NFS4ERR_NOENT; + + clp->cl_mig_gen++; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + struct inode *inode; + int status; + + if (server->mig_gen == clp->cl_mig_gen) + continue; + server->mig_gen = clp->cl_mig_gen; + + rcu_read_unlock(); + + inode = server->super->s_root->d_inode; + status = nfs4_proc_fsid_present(inode, cred); + if (status != -NFS4ERR_MOVED) + goto restart; /* wasn't this one */ + if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED) + goto restart; /* there are more */ + goto out; + } + rcu_read_unlock(); + +out: + put_rpccred(cred); + return 0; +} + /** * nfs4_discover_server_trunking - Detect server IP address trunking * @@ -1819,32 +2054,19 @@ int nfs4_discover_server_trunking(struct nfs_client *clp, { const struct nfs4_state_recovery_ops *ops = clp->cl_mvops->reboot_recovery_ops; - rpc_authflavor_t *flavors, flav, save; struct rpc_clnt *clnt; struct rpc_cred *cred; - int i, len, status; + int i, status; dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); - len = NFS_MAX_SECFLAVORS; - flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); - if (flavors == NULL) { - status = -ENOMEM; - goto out; - } - len = rpcauth_list_flavors(flavors, len); - if (len < 0) { - status = len; - goto out_free; - } clnt = clp->cl_rpcclient; - save = clnt->cl_auth->au_flavor; i = 0; mutex_lock(&nfs_clid_init_mutex); - status = -ENOENT; again: - cred = ops->get_clid_cred(clp); + status = -ENOENT; + cred = nfs4_get_clid_cred(clp); if (cred == NULL) goto out_unlock; @@ -1853,35 +2075,42 @@ again: switch (status) { case 0: break; - - case -EACCES: - if (clp->cl_machine_cred == NULL) + case -ETIMEDOUT: + if (clnt->cl_softrtry) break; - /* Handle case where the user hasn't set up machine creds */ - nfs4_clear_machine_cred(clp); case -NFS4ERR_DELAY: - case -ETIMEDOUT: case -EAGAIN: ssleep(1); + case -NFS4ERR_STALE_CLIENTID: dprintk("NFS: %s after status %d, retrying\n", __func__, status); goto again; - + case -EACCES: + if (i++ == 0) { + nfs4_root_machine_cred(clp); + goto again; + } + if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) + break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - status = -EPERM; - if (i >= len) + /* No point in retrying if we already used RPC_AUTH_UNIX */ + if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) { + status = -EPERM; break; - - flav = flavors[i++]; - if (flav == save) - flav = flavors[i++]; - clnt = rpc_clone_client_set_auth(clnt, flav); + } + clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); break; } - clp->cl_rpcclient = clnt; + /* Note: this is safe because we haven't yet marked the + * client as ready, so we are the only user of + * clp->cl_rpcclient + */ + clnt = xchg(&clp->cl_rpcclient, clnt); + rpc_shutdown_client(clnt); + clnt = clp->cl_rpcclient; goto again; case -NFS4ERR_MINOR_VERS_MISMATCH: @@ -1892,13 +2121,15 @@ again: case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery * in nfs4_exchange_id */ status = -EKEYEXPIRED; + break; + default: + pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n", + __func__, status); + status = -EIO; } out_unlock: mutex_unlock(&nfs_clid_init_mutex); -out_free: - kfree(flavors); -out: dprintk("NFS: %s: status = %d\n", __func__, status); return status; } @@ -2000,9 +2231,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs41_handle_server_reboot(clp); if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | - SEQ4_STATUS_ADMIN_STATE_REVOKED | - SEQ4_STATUS_LEASE_MOVED)) + SEQ4_STATUS_ADMIN_STATE_REVOKED)) nfs41_handle_state_revoked(clp); + if (flags & SEQ4_STATUS_LEASE_MOVED) + nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) @@ -2020,10 +2252,20 @@ static int nfs4_reset_session(struct nfs_client *clp) if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); status = nfs4_proc_destroy_session(clp->cl_session, cred); - if (status && status != -NFS4ERR_BADSESSION && - status != -NFS4ERR_DEADSESSION) { + switch (status) { + case 0: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + break; + case -NFS4ERR_BACK_CHAN_BUSY: + case -NFS4ERR_DELAY: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + status = 0; + ssleep(1); + goto out; + default: status = nfs4_recovery_handle_error(clp, status); goto out; } @@ -2053,7 +2295,7 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) if (!nfs4_has_session(clp)) return 0; nfs4_begin_drain_session(clp); - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_bind_conn_to_session(clp, cred); if (cred) put_rpccred(cred); @@ -2074,7 +2316,6 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) } #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; } static int nfs4_bind_conn_to_session(struct nfs_client *clp) { @@ -2131,7 +2372,20 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_check_lease(clp); if (status < 0) goto out_error; - continue; + } + + if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { + section = "migration"; + status = nfs4_handle_migration(clp); + if (status < 0) + goto out_error; + } + + if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) { + section = "lease moved"; + status = nfs4_handle_lease_moved(clp); + if (status < 0) + goto out_error; } /* First recover reboot state... */ diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 84d2e9e2f31..6f340f02f2b 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -9,6 +9,7 @@ #include "delegation.h" #include "internal.h" #include "nfs4_fs.h" +#include "dns_resolve.h" #include "pnfs.h" #include "nfs.h" @@ -28,7 +29,7 @@ static struct file_system_type nfs4_remote_fs_type = { .name = "nfs4", .mount = nfs4_remote_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; static struct file_system_type nfs4_remote_referral_fs_type = { @@ -36,7 +37,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = { .name = "nfs4", .mount = nfs4_remote_referral_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; struct file_system_type nfs4_referral_fs_type = { @@ -44,7 +45,7 @@ struct file_system_type nfs4_referral_fs_type = { .name = "nfs4", .mount = nfs4_referral_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; static const struct super_operations nfs4_sops = { @@ -76,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret = nfs_write_inode(inode, wbc); - if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { - int status; - bool sync = true; - - if (wbc->sync_mode == WB_SYNC_NONE) - sync = false; - - status = pnfs_layoutcommit_inode(inode, sync); - if (status < 0) - return status; - } + if (ret == 0) + ret = pnfs_layoutcommit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL); return ret; } @@ -97,7 +90,7 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) */ static void nfs4_evict_inode(struct inode *inode) { - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); pnfs_return_layout(inode); pnfs_destroy_layout(NFS_I(inode)); @@ -260,9 +253,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, res = nfs_follow_remote_path(root_mnt, export_path); - dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); + dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", + PTR_ERR_OR_ZERO(res), + IS_ERR(res) ? " [error]" : ""); return res; } @@ -318,9 +311,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); + dprintk("<-- nfs4_referral_mount() = %d%s\n", + PTR_ERR_OR_ZERO(res), + IS_ERR(res) ? " [error]" : ""); return res; } @@ -329,18 +322,24 @@ static int __init init_nfs_v4(void) { int err; - err = nfs_idmap_init(); + err = nfs_dns_resolver_init(); if (err) goto out; - err = nfs4_register_sysctl(); + err = nfs_idmap_init(); if (err) goto out1; + err = nfs4_register_sysctl(); + if (err) + goto out2; + register_nfs_version(&nfs_v4); return 0; -out1: +out2: nfs_idmap_quit(); +out1: + nfs_dns_resolver_destroy(); out: return err; } @@ -350,6 +349,7 @@ static void __exit exit_nfs_v4(void) unregister_nfs_version(&nfs_v4); nfs4_unregister_sysctl(); nfs_idmap_quit(); + nfs_dns_resolver_destroy(); } MODULE_LICENSE("GPL"); diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c index 2628d921b7e..b6ebe7e445f 100644 --- a/fs/nfs/nfs4sysctl.c +++ b/fs/nfs/nfs4sysctl.c @@ -16,7 +16,7 @@ static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; static struct ctl_table_header *nfs4_callback_sysctl_table; -static ctl_table nfs4_cb_sysctls[] = { +static struct ctl_table nfs4_cb_sysctls[] = { { .procname = "nfs_callback_tcpport", .data = &nfs_callback_set_tcpport, @@ -36,7 +36,7 @@ static ctl_table nfs4_cb_sysctls[] = { { } }; -static ctl_table nfs4_cb_sysctl_dir[] = { +static struct ctl_table nfs4_cb_sysctl_dir[] = { { .procname = "nfs", .mode = 0555, @@ -45,7 +45,7 @@ static ctl_table nfs4_cb_sysctl_dir[] = { { } }; -static ctl_table nfs4_cb_sysctl_root[] = { +static struct ctl_table nfs4_cb_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c new file mode 100644 index 00000000000..d774335cc8b --- /dev/null +++ b/fs/nfs/nfs4trace.c @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#include <linux/nfs_fs.h> +#include "nfs4_fs.h" +#include "internal.h" +#include "nfs4session.h" +#include "callback.h" + +#define CREATE_TRACE_POINTS +#include "nfs4trace.h" + +#ifdef CONFIG_NFS_V4_1 +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds); +#endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h new file mode 100644 index 00000000000..0a744f3a86f --- /dev/null +++ b/fs/nfs/nfs4trace.h @@ -0,0 +1,1148 @@ +/* + * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs4 + +#if !defined(_TRACE_NFS4_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS4_H + +#include <linux/tracepoint.h> + +#define show_nfsv4_errors(error) \ + __print_symbolic(error, \ + { NFS4_OK, "OK" }, \ + /* Mapped by nfs4_stat_to_errno() */ \ + { -EPERM, "EPERM" }, \ + { -ENOENT, "ENOENT" }, \ + { -EIO, "EIO" }, \ + { -ENXIO, "ENXIO" }, \ + { -EACCES, "EACCES" }, \ + { -EEXIST, "EEXIST" }, \ + { -EXDEV, "EXDEV" }, \ + { -ENOTDIR, "ENOTDIR" }, \ + { -EISDIR, "EISDIR" }, \ + { -EFBIG, "EFBIG" }, \ + { -ENOSPC, "ENOSPC" }, \ + { -EROFS, "EROFS" }, \ + { -EMLINK, "EMLINK" }, \ + { -ENAMETOOLONG, "ENAMETOOLONG" }, \ + { -ENOTEMPTY, "ENOTEMPTY" }, \ + { -EDQUOT, "EDQUOT" }, \ + { -ESTALE, "ESTALE" }, \ + { -EBADHANDLE, "EBADHANDLE" }, \ + { -EBADCOOKIE, "EBADCOOKIE" }, \ + { -ENOTSUPP, "ENOTSUPP" }, \ + { -ETOOSMALL, "ETOOSMALL" }, \ + { -EREMOTEIO, "EREMOTEIO" }, \ + { -EBADTYPE, "EBADTYPE" }, \ + { -EAGAIN, "EAGAIN" }, \ + { -ELOOP, "ELOOP" }, \ + { -EOPNOTSUPP, "EOPNOTSUPP" }, \ + { -EDEADLK, "EDEADLK" }, \ + /* RPC errors */ \ + { -ENOMEM, "ENOMEM" }, \ + { -EKEYEXPIRED, "EKEYEXPIRED" }, \ + { -ETIMEDOUT, "ETIMEDOUT" }, \ + { -ERESTARTSYS, "ERESTARTSYS" }, \ + { -ECONNREFUSED, "ECONNREFUSED" }, \ + { -ECONNRESET, "ECONNRESET" }, \ + { -ENETUNREACH, "ENETUNREACH" }, \ + { -EHOSTUNREACH, "EHOSTUNREACH" }, \ + { -EHOSTDOWN, "EHOSTDOWN" }, \ + { -EPIPE, "EPIPE" }, \ + { -EPFNOSUPPORT, "EPFNOSUPPORT" }, \ + { -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \ + /* NFSv4 native errors */ \ + { -NFS4ERR_ACCESS, "ACCESS" }, \ + { -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \ + { -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \ + { -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \ + { -NFS4ERR_BADCHAR, "BADCHAR" }, \ + { -NFS4ERR_BADHANDLE, "BADHANDLE" }, \ + { -NFS4ERR_BADIOMODE, "BADIOMODE" }, \ + { -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \ + { -NFS4ERR_BADLABEL, "BADLABEL" }, \ + { -NFS4ERR_BADNAME, "BADNAME" }, \ + { -NFS4ERR_BADOWNER, "BADOWNER" }, \ + { -NFS4ERR_BADSESSION, "BADSESSION" }, \ + { -NFS4ERR_BADSLOT, "BADSLOT" }, \ + { -NFS4ERR_BADTYPE, "BADTYPE" }, \ + { -NFS4ERR_BADXDR, "BADXDR" }, \ + { -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \ + { -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \ + { -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \ + { -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \ + { -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \ + { -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \ + { -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \ + { -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \ + { -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \ + { -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \ + "CONN_NOT_BOUND_TO_SESSION" }, \ + { -NFS4ERR_DEADLOCK, "DEADLOCK" }, \ + { -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \ + { -NFS4ERR_DELAY, "DELAY" }, \ + { -NFS4ERR_DELEG_ALREADY_WANTED, \ + "DELEG_ALREADY_WANTED" }, \ + { -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \ + { -NFS4ERR_DENIED, "DENIED" }, \ + { -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \ + { -NFS4ERR_DQUOT, "DQUOT" }, \ + { -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \ + { -NFS4ERR_EXIST, "EXIST" }, \ + { -NFS4ERR_EXPIRED, "EXPIRED" }, \ + { -NFS4ERR_FBIG, "FBIG" }, \ + { -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \ + { -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \ + { -NFS4ERR_GRACE, "GRACE" }, \ + { -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \ + { -NFS4ERR_INVAL, "INVAL" }, \ + { -NFS4ERR_IO, "IO" }, \ + { -NFS4ERR_ISDIR, "ISDIR" }, \ + { -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \ + { -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \ + { -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \ + { -NFS4ERR_LOCKED, "LOCKED" }, \ + { -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \ + { -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \ + { -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \ + { -NFS4ERR_MLINK, "MLINK" }, \ + { -NFS4ERR_MOVED, "MOVED" }, \ + { -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \ + { -NFS4ERR_NOENT, "NOENT" }, \ + { -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \ + { -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \ + { -NFS4ERR_NOSPC, "NOSPC" }, \ + { -NFS4ERR_NOTDIR, "NOTDIR" }, \ + { -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \ + { -NFS4ERR_NOTSUPP, "NOTSUPP" }, \ + { -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \ + { -NFS4ERR_NOT_SAME, "NOT_SAME" }, \ + { -NFS4ERR_NO_GRACE, "NO_GRACE" }, \ + { -NFS4ERR_NXIO, "NXIO" }, \ + { -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \ + { -NFS4ERR_OPENMODE, "OPENMODE" }, \ + { -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \ + { -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \ + { -NFS4ERR_PERM, "PERM" }, \ + { -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \ + { -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \ + { -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \ + { -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \ + { -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \ + { -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \ + { -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \ + { -NFS4ERR_REP_TOO_BIG_TO_CACHE, \ + "REP_TOO_BIG_TO_CACHE" }, \ + { -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \ + { -NFS4ERR_RESOURCE, "RESOURCE" }, \ + { -NFS4ERR_RESTOREFH, "RESTOREFH" }, \ + { -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \ + { -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \ + { -NFS4ERR_ROFS, "ROFS" }, \ + { -NFS4ERR_SAME, "SAME" }, \ + { -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \ + { -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \ + { -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \ + { -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \ + { -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \ + { -NFS4ERR_STALE, "STALE" }, \ + { -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \ + { -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \ + { -NFS4ERR_SYMLINK, "SYMLINK" }, \ + { -NFS4ERR_TOOSMALL, "TOOSMALL" }, \ + { -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \ + { -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \ + { -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \ + { -NFS4ERR_WRONGSEC, "WRONGSEC" }, \ + { -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ + { -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ + { -NFS4ERR_XDEV, "XDEV" }) + +#define show_open_flags(flags) \ + __print_flags(flags, "|", \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_DIRECT, "O_DIRECT" }) + +#define show_fmode_flags(mode) \ + __print_flags(mode, "|", \ + { ((__force unsigned long)FMODE_READ), "READ" }, \ + { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ + { ((__force unsigned long)FMODE_EXEC), "EXEC" }) + +#define show_nfs_fattr_flags(valid) \ + __print_flags((unsigned long)valid, "|", \ + { NFS_ATTR_FATTR_TYPE, "TYPE" }, \ + { NFS_ATTR_FATTR_MODE, "MODE" }, \ + { NFS_ATTR_FATTR_NLINK, "NLINK" }, \ + { NFS_ATTR_FATTR_OWNER, "OWNER" }, \ + { NFS_ATTR_FATTR_GROUP, "GROUP" }, \ + { NFS_ATTR_FATTR_RDEV, "RDEV" }, \ + { NFS_ATTR_FATTR_SIZE, "SIZE" }, \ + { NFS_ATTR_FATTR_FSID, "FSID" }, \ + { NFS_ATTR_FATTR_FILEID, "FILEID" }, \ + { NFS_ATTR_FATTR_ATIME, "ATIME" }, \ + { NFS_ATTR_FATTR_MTIME, "MTIME" }, \ + { NFS_ATTR_FATTR_CTIME, "CTIME" }, \ + { NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \ + { NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \ + { NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" }) + +DECLARE_EVENT_CLASS(nfs4_clientid_event, + TP_PROTO( + const struct nfs_client *clp, + int error + ), + + TP_ARGS(clp, error), + + TP_STRUCT__entry( + __string(dstaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)) + __field(int, error) + ), + + TP_fast_assign( + __entry->error = error; + __assign_str(dstaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + ), + + TP_printk( + "error=%d (%s) dstaddr=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + __get_str(dstaddr) + ) +); +#define DEFINE_NFS4_CLIENTID_EVENT(name) \ + DEFINE_EVENT(nfs4_clientid_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp, \ + int error \ + ), \ + TP_ARGS(clp, error)) +DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid_confirm); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew_async); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_CLIENTID_EVENT(nfs4_exchange_id); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_create_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_clientid); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence); +DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete); + +TRACE_EVENT(nfs4_setup_sequence, + TP_PROTO( + const struct nfs4_session *session, + const struct nfs4_sequence_args *args + ), + TP_ARGS(session, args), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_used_slotid) + ), + + TP_fast_assign( + const struct nfs4_slot *sa_slot = args->sa_slot; + __entry->session = nfs_session_id_hash(&session->sess_id); + __entry->slot_nr = sa_slot->slot_nr; + __entry->seq_nr = sa_slot->seq_nr; + __entry->highest_used_slotid = + sa_slot->table->highest_used_slotid; + ), + TP_printk( + "session=0x%08x slot_nr=%u seq_nr=%u " + "highest_used_slotid=%u", + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_used_slotid + ) +); + +#define show_nfs4_sequence_status_flags(status) \ + __print_flags((unsigned long)status, "|", \ + { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \ + "CB_GSS_CONTEXTS_EXPIRING" }, \ + { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \ + "CB_GSS_CONTEXTS_EXPIRED" }, \ + { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \ + "EXPIRED_ALL_STATE_REVOKED" }, \ + { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \ + "EXPIRED_SOME_STATE_REVOKED" }, \ + { SEQ4_STATUS_ADMIN_STATE_REVOKED, \ + "ADMIN_STATE_REVOKED" }, \ + { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \ + "RECALLABLE_STATE_REVOKED" }, \ + { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \ + { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \ + "RESTART_RECLAIM_NEEDED" }, \ + { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \ + "CB_PATH_DOWN_SESSION" }, \ + { SEQ4_STATUS_BACKCHANNEL_FAULT, \ + "BACKCHANNEL_FAULT" }) + +TRACE_EVENT(nfs4_sequence_done, + TP_PROTO( + const struct nfs4_session *session, + const struct nfs4_sequence_res *res + ), + TP_ARGS(session, res), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, target_highest_slotid) + __field(unsigned int, status_flags) + __field(int, error) + ), + + TP_fast_assign( + const struct nfs4_slot *sr_slot = res->sr_slot; + __entry->session = nfs_session_id_hash(&session->sess_id); + __entry->slot_nr = sr_slot->slot_nr; + __entry->seq_nr = sr_slot->seq_nr; + __entry->highest_slotid = res->sr_highest_slotid; + __entry->target_highest_slotid = + res->sr_target_highest_slotid; + __entry->error = res->sr_status; + ), + TP_printk( + "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u target_highest_slotid=%u " + "status_flags=%u (%s)", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid, + __entry->target_highest_slotid, + __entry->status_flags, + show_nfs4_sequence_status_flags(__entry->status_flags) + ) +); + +struct cb_sequenceargs; +struct cb_sequenceres; + +TRACE_EVENT(nfs4_cb_sequence, + TP_PROTO( + const struct cb_sequenceargs *args, + const struct cb_sequenceres *res, + __be32 status + ), + TP_ARGS(args, res, status), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, cachethis) + __field(int, error) + ), + + TP_fast_assign( + __entry->session = nfs_session_id_hash(&args->csa_sessionid); + __entry->slot_nr = args->csa_slotid; + __entry->seq_nr = args->csa_sequenceid; + __entry->highest_slotid = args->csa_highestslotid; + __entry->cachethis = args->csa_cachethis; + __entry->error = -be32_to_cpu(status); + ), + + TP_printk( + "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid + ) +); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_open_event, + TP_PROTO( + const struct nfs_open_context *ctx, + int flags, + int error + ), + + TP_ARGS(ctx, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + const struct nfs4_state *state = ctx->state; + const struct inode *inode = NULL; + + __entry->error = error; + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __entry->dev = ctx->dentry->d_sb->s_dev; + if (!IS_ERR(state)) + inode = state->inode; + if (inode != NULL) { + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + } else { + __entry->fileid = 0; + __entry->fhandle = 0; + } + __entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode); + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "error=%d (%s) flags=%d (%s) fmode=%s " + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "name=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS4_OPEN_EVENT(name) \ + DEFINE_EVENT(nfs4_open_event, name, \ + TP_PROTO( \ + const struct nfs_open_context *ctx, \ + int flags, \ + int error \ + ), \ + TP_ARGS(ctx, flags, error)) +DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim); +DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired); +DEFINE_NFS4_OPEN_EVENT(nfs4_open_file); + +TRACE_EVENT(nfs4_close, + TP_PROTO( + const struct nfs4_state *state, + const struct nfs_closeargs *args, + const struct nfs_closeres *res, + int error + ), + + TP_ARGS(state, args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, fmode) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fmode = (__force unsigned int)state->state; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " + "fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->fmode ? show_fmode_flags(__entry->fmode) : + "closed", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define show_lock_cmd(type) \ + __print_symbolic((int)type, \ + { F_GETLK, "GETLK" }, \ + { F_SETLK, "SETLK" }, \ + { F_SETLKW, "SETLKW" }) +#define show_lock_type(type) \ + __print_symbolic((int)type, \ + { F_RDLCK, "RDLCK" }, \ + { F_WRLCK, "WRLCK" }, \ + { F_UNLCK, "UNLCK" }) + +DECLARE_EVENT_CLASS(nfs4_lock_event, + TP_PROTO( + const struct file_lock *request, + const struct nfs4_state *state, + int cmd, + int error + ), + + TP_ARGS(request, state, cmd, error), + + TP_STRUCT__entry( + __field(int, error) + __field(int, cmd) + __field(char, type) + __field(loff_t, start) + __field(loff_t, end) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->error = error; + __entry->cmd = cmd; + __entry->type = request->fl_type; + __entry->start = request->fl_start; + __entry->end = request->fl_end; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + ), + + TP_printk( + "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + show_lock_cmd(__entry->cmd), + show_lock_type(__entry->type), + (long long)__entry->start, + (long long)__entry->end, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_LOCK_EVENT(name) \ + DEFINE_EVENT(nfs4_lock_event, name, \ + TP_PROTO( \ + const struct file_lock *request, \ + const struct nfs4_state *state, \ + int cmd, \ + int error \ + ), \ + TP_ARGS(request, state, cmd, error)) +DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock); +DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock); +DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim); +DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired); +DEFINE_NFS4_LOCK_EVENT(nfs4_unlock); + +DECLARE_EVENT_CLASS(nfs4_set_delegation_event, + TP_PROTO( + const struct inode *inode, + fmode_t fmode + ), + + TP_ARGS(inode, fmode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, fmode) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fmode = (__force unsigned int)fmode; + ), + + TP_printk( + "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x", + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); +#define DEFINE_NFS4_SET_DELEGATION_EVENT(name) \ + DEFINE_EVENT(nfs4_set_delegation_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + fmode_t fmode \ + ), \ + TP_ARGS(inode, fmode)) +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation); +DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation); + +TRACE_EVENT(nfs4_delegreturn_exit, + TP_PROTO( + const struct nfs4_delegreturnargs *args, + const struct nfs4_delegreturnres *res, + int error + ), + + TP_ARGS(args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = res->server->s_dev; + __entry->fhandle = nfs_fhandle_hash(args->fhandle); + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) dev=%02x:%02x fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fhandle + ) +); + +#ifdef CONFIG_NFS_V4_1 +DECLARE_EVENT_CLASS(nfs4_test_stateid_event, + TP_PROTO( + const struct nfs4_state *state, + const struct nfs4_lock_state *lsp, + int error + ), + + TP_ARGS(state, lsp, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + ), + + TP_fast_assign( + const struct inode *inode = state->inode; + + __entry->error = error; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_TEST_STATEID_EVENT(name) \ + DEFINE_EVENT(nfs4_test_stateid_event, name, \ + TP_PROTO( \ + const struct nfs4_state *state, \ + const struct nfs4_lock_state *lsp, \ + int error \ + ), \ + TP_ARGS(state, lsp, error)) +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_delegation_stateid); +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_open_stateid); +DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_lock_stateid); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_lookup_event, + TP_PROTO( + const struct inode *dir, + const struct qstr *name, + int error + ), + + TP_ARGS(dir, name, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, dir) + __string(name, name->name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, name->name); + ), + + TP_printk( + "error=%d (%s) name=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS4_LOOKUP_EVENT(name) \ + DEFINE_EVENT(nfs4_lookup_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct qstr *name, \ + int error \ + ), \ + TP_ARGS(dir, name, error)) + +DEFINE_NFS4_LOOKUP_EVENT(nfs4_lookup); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_symlink); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_mkdir); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_mknod); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations); +DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo); + +TRACE_EVENT(nfs4_rename, + TP_PROTO( + const struct inode *olddir, + const struct qstr *oldname, + const struct inode *newdir, + const struct qstr *newname, + int error + ), + + TP_ARGS(olddir, oldname, newdir, newname, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, olddir) + __string(oldname, oldname->name) + __field(u64, newdir) + __string(newname, newname->name) + ), + + TP_fast_assign( + __entry->dev = olddir->i_sb->s_dev; + __entry->olddir = NFS_FILEID(olddir); + __entry->newdir = NFS_FILEID(newdir); + __entry->error = error; + __assign_str(oldname, oldname->name); + __assign_str(newname, newname->name); + ), + + TP_printk( + "error=%d (%s) oldname=%02x:%02x:%llu/%s " + "newname=%02x:%02x:%llu/%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->olddir, + __get_str(oldname), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->newdir, + __get_str(newname) + ) +); + +DECLARE_EVENT_CLASS(nfs4_inode_event, + TP_PROTO( + const struct inode *inode, + int error + ), + + TP_ARGS(inode, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle + ) +); + +#define DEFINE_NFS4_INODE_EVENT(name) \ + DEFINE_EVENT(nfs4_inode_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + int error \ + ), \ + TP_ARGS(inode, error)) + +DEFINE_NFS4_INODE_EVENT(nfs4_setattr); +DEFINE_NFS4_INODE_EVENT(nfs4_access); +DEFINE_NFS4_INODE_EVENT(nfs4_readlink); +DEFINE_NFS4_INODE_EVENT(nfs4_readdir); +DEFINE_NFS4_INODE_EVENT(nfs4_get_acl); +DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); +DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); +#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ +DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); +DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn); + +DECLARE_EVENT_CLASS(nfs4_getattr_event, + TP_PROTO( + const struct nfs_server *server, + const struct nfs_fh *fhandle, + const struct nfs_fattr *fattr, + int error + ), + + TP_ARGS(server, fhandle, fattr, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(unsigned int, valid) + __field(int, error) + ), + + TP_fast_assign( + __entry->dev = server->s_dev; + __entry->valid = fattr->valid; + __entry->fhandle = nfs_fhandle_hash(fhandle); + __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "valid=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_nfs_fattr_flags(__entry->valid) + ) +); + +#define DEFINE_NFS4_GETATTR_EVENT(name) \ + DEFINE_EVENT(nfs4_getattr_event, name, \ + TP_PROTO( \ + const struct nfs_server *server, \ + const struct nfs_fh *fhandle, \ + const struct nfs_fattr *fattr, \ + int error \ + ), \ + TP_ARGS(server, fhandle, fattr, error)) +DEFINE_NFS4_GETATTR_EVENT(nfs4_getattr); +DEFINE_NFS4_GETATTR_EVENT(nfs4_lookup_root); +DEFINE_NFS4_GETATTR_EVENT(nfs4_fsinfo); + +DECLARE_EVENT_CLASS(nfs4_idmap_event, + TP_PROTO( + const char *name, + int len, + u32 id, + int error + ), + + TP_ARGS(name, len, id, error), + + TP_STRUCT__entry( + __field(int, error) + __field(u32, id) + __dynamic_array(char, name, len > 0 ? len + 1 : 1) + ), + + TP_fast_assign( + if (len < 0) + len = 0; + __entry->error = error < 0 ? error : 0; + __entry->id = id; + memcpy(__get_dynamic_array(name), name, len); + ((char *)__get_dynamic_array(name))[len] = 0; + ), + + TP_printk( + "error=%d id=%u name=%s", + __entry->error, + __entry->id, + __get_str(name) + ) +); +#define DEFINE_NFS4_IDMAP_EVENT(name) \ + DEFINE_EVENT(nfs4_idmap_event, name, \ + TP_PROTO( \ + const char *name, \ + int len, \ + u32 id, \ + int error \ + ), \ + TP_ARGS(name, len, id, error)) +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_name_to_uid); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name); +DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); + +DECLARE_EVENT_CLASS(nfs4_read_event, + TP_PROTO( + const struct nfs_pgio_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->header->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); +#define DEFINE_NFS4_READ_EVENT(name) \ + DEFINE_EVENT(nfs4_read_event, name, \ + TP_PROTO( \ + const struct nfs_pgio_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_READ_EVENT(nfs4_read); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_write_event, + TP_PROTO( + const struct nfs_pgio_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->header->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); + +#define DEFINE_NFS4_WRITE_EVENT(name) \ + DEFINE_EVENT(nfs4_write_event, name, \ + TP_PROTO( \ + const struct nfs_pgio_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_WRITE_EVENT(nfs4_write); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); +#endif /* CONFIG_NFS_V4_1 */ + +DECLARE_EVENT_CLASS(nfs4_commit_event, + TP_PROTO( + const struct nfs_commit_data *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(size_t, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%zu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, + __entry->count + ) +); +#define DEFINE_NFS4_COMMIT_EVENT(name) \ + DEFINE_EVENT(nfs4_commit_event, name, \ + TP_PROTO( \ + const struct nfs_commit_data *data, \ + int error \ + ), \ + TP_ARGS(data, error)) +DEFINE_NFS4_COMMIT_EVENT(nfs4_commit); +#ifdef CONFIG_NFS_V4_1 +DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds); + +#define show_pnfs_iomode(iomode) \ + __print_symbolic(iomode, \ + { IOMODE_READ, "READ" }, \ + { IOMODE_RW, "RW" }, \ + { IOMODE_ANY, "ANY" }) + +TRACE_EVENT(nfs4_layoutget, + TP_PROTO( + const struct nfs_open_context *ctx, + const struct pnfs_layout_range *args, + const struct pnfs_layout_range *res, + int error + ), + + TP_ARGS(ctx, args, res, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u32, iomode) + __field(u64, offset) + __field(u64, count) + __field(int, error) + ), + + TP_fast_assign( + const struct inode *inode = ctx->dentry->d_inode; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->iomode = args->iomode; + __entry->offset = args->offset; + __entry->count = args->length; + __entry->error = error; + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "iomode=%s offset=%llu count=%llu", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_pnfs_iomode(__entry->iomode), + (unsigned long long)__entry->offset, + (unsigned long long)__entry->count + ) +); + +DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit); +DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn); + +#endif /* CONFIG_NFS_V4_1 */ + +#endif /* _TRACE_NFS4_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE nfs4trace +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 26b14392043..939ae606cfa 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -102,12 +102,19 @@ static int nfs4_stat_to_errno(int); #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#ifdef CONFIG_NFS_V4_SECURITY_LABEL +/* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */ +#define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN)) +#else +#define nfs4_label_maxsz 0 +#endif /* We support only one layout type per file system */ #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) /* This is based on getfattr, which uses the most attributes: */ #define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ 3 + 3 + 3 + nfs4_owner_maxsz + \ - nfs4_group_maxsz + decode_mdsthreshold_maxsz)) + nfs4_group_maxsz + nfs4_label_maxsz + \ + decode_mdsthreshold_maxsz)) #define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ nfs4_fattr_value_maxsz) #define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) @@ -115,6 +122,7 @@ static int nfs4_stat_to_errno(int); 1 + 2 + 1 + \ nfs4_owner_maxsz + \ nfs4_group_maxsz + \ + nfs4_label_maxsz + \ 4 + 4) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) @@ -192,7 +200,8 @@ static int nfs4_stat_to_errno(int); encode_stateid_maxsz + 3) #define decode_read_maxsz (op_decode_hdr_maxsz + 2) #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ - 2 + encode_verifier_maxsz + 5) + 2 + encode_verifier_maxsz + 5 + \ + nfs4_label_maxsz) #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ decode_verifier_maxsz) #define encode_readlink_maxsz (op_encode_hdr_maxsz) @@ -280,7 +289,9 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ - 0 /* SP4_NONE (for now) */ + \ + /* max is SP4_MACH_CRED (for now) */ + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ 1 /* implementation id array of size 1 */ + \ 1 /* nii_domain */ + \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ @@ -292,7 +303,9 @@ static int nfs4_stat_to_errno(int); 1 /* eir_sequenceid */ + \ 1 /* eir_flags */ + \ 1 /* spr_how */ + \ - 0 /* SP4_NONE (for now) */ + \ + /* max is SP4_MACH_CRED (for now) */ + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ + 1 + NFS4_OP_MAP_NUM_WORDS + \ 2 /* eir_server_owner.so_minor_id */ + \ /* eir_server_owner.so_major_id<> */ \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ @@ -396,7 +409,7 @@ static int nfs4_stat_to_errno(int); #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ XDR_QUADLEN(NFS4_STATEID_SIZE)) -#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -530,14 +543,10 @@ static int nfs4_stat_to_errno(int); decode_setclientid_maxsz) #define NFS4_enc_setclientid_confirm_sz \ (compound_encode_hdr_maxsz + \ - encode_setclientid_confirm_maxsz + \ - encode_putrootfh_maxsz + \ - encode_fsinfo_maxsz) + encode_setclientid_confirm_maxsz) #define NFS4_dec_setclientid_confirm_sz \ (compound_decode_hdr_maxsz + \ - decode_setclientid_confirm_maxsz + \ - decode_putrootfh_maxsz + \ - decode_fsinfo_maxsz) + decode_setclientid_confirm_maxsz) #define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -581,11 +590,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_renew_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_renew_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -722,13 +733,15 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ - encode_fs_locations_maxsz) + encode_fs_locations_maxsz + \ + encode_renew_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ - decode_fs_locations_maxsz) + decode_fs_locations_maxsz + \ + decode_renew_maxsz) #define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -737,6 +750,18 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_secinfo_maxsz) +#define NFS4_enc_fsid_present_sz \ + (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getfh_maxsz + \ + encode_renew_maxsz) +#define NFS4_dec_fsid_present_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getfh_maxsz + \ + decode_renew_maxsz) #if defined(CONFIG_NFS_V4_1) #define NFS4_enc_bind_conn_to_session_sz \ (compound_encode_hdr_maxsz + \ @@ -857,6 +882,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + decode_sequence_maxsz + decode_putfh_maxsz) * XDR_UNIT); + +const u32 nfs41_maxgetdevinfo_overhead = ((RPC_MAX_REPHEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz) * + XDR_UNIT); +EXPORT_SYMBOL_GPL(nfs41_maxgetdevinfo_overhead); #endif /* CONFIG_NFS_V4_1 */ static const umode_t nfs_type2fmt[] = { @@ -972,124 +1003,123 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); } -static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) +static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, + const struct nfs4_label *label, + const struct nfs_server *server) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; int owner_namelen = 0; int owner_grouplen = 0; __be32 *p; - __be32 *q; - int len; - uint32_t bmval0 = 0; - uint32_t bmval1 = 0; + unsigned i; + uint32_t len = 0; + uint32_t bmval_len; + uint32_t bmval[3] = { 0 }; /* * We reserve enough space to write the entire attribute buffer at once. * In the worst-case, this would be - * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) - * = 36 bytes, plus any contribution from variable-length fields + * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) + * = 40 bytes, plus any contribution from variable-length fields * such as owner/group. */ - len = 16; - - /* Sigh */ - if (iap->ia_valid & ATTR_SIZE) + if (iap->ia_valid & ATTR_SIZE) { + bmval[0] |= FATTR4_WORD0_SIZE; len += 8; - if (iap->ia_valid & ATTR_MODE) + } + if (iap->ia_valid & ATTR_MODE) { + bmval[1] |= FATTR4_WORD1_MODE; len += 4; + } if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { dprintk("nfs: couldn't resolve uid %d to string\n", - iap->ia_uid); + from_kuid(&init_user_ns, iap->ia_uid)); /* XXX */ strcpy(owner_name, "nobody"); owner_namelen = sizeof("nobody") - 1; /* goto out; */ } + bmval[1] |= FATTR4_WORD1_OWNER; len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); if (owner_grouplen < 0) { dprintk("nfs: couldn't resolve gid %d to string\n", - iap->ia_gid); + from_kgid(&init_user_ns, iap->ia_gid)); strcpy(owner_group, "nobody"); owner_grouplen = sizeof("nobody") - 1; /* goto out; */ } + bmval[1] |= FATTR4_WORD1_OWNER_GROUP; len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); } - if (iap->ia_valid & ATTR_ATIME_SET) + if (iap->ia_valid & ATTR_ATIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 16; - else if (iap->ia_valid & ATTR_ATIME) + } else if (iap->ia_valid & ATTR_ATIME) { + bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; len += 4; - if (iap->ia_valid & ATTR_MTIME_SET) + } + if (iap->ia_valid & ATTR_MTIME_SET) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 16; - else if (iap->ia_valid & ATTR_MTIME) + } else if (iap->ia_valid & ATTR_MTIME) { + bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET; len += 4; - p = reserve_space(xdr, len); + } + if (label) { + len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); + bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; + } - /* - * We write the bitmap length now, but leave the bitmap and the attribute - * buffer length to be backfilled at the end of this routine. - */ - *p++ = cpu_to_be32(2); - q = p; - p += 3; + if (bmval[2] != 0) + bmval_len = 3; + else if (bmval[1] != 0) + bmval_len = 2; + else + bmval_len = 1; - if (iap->ia_valid & ATTR_SIZE) { - bmval0 |= FATTR4_WORD0_SIZE; + p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len); + + *p++ = cpu_to_be32(bmval_len); + for (i = 0; i < bmval_len; i++) + *p++ = cpu_to_be32(bmval[i]); + *p++ = cpu_to_be32(len); + + if (bmval[0] & FATTR4_WORD0_SIZE) p = xdr_encode_hyper(p, iap->ia_size); - } - if (iap->ia_valid & ATTR_MODE) { - bmval1 |= FATTR4_WORD1_MODE; + if (bmval[1] & FATTR4_WORD1_MODE) *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); - } - if (iap->ia_valid & ATTR_UID) { - bmval1 |= FATTR4_WORD1_OWNER; + if (bmval[1] & FATTR4_WORD1_OWNER) p = xdr_encode_opaque(p, owner_name, owner_namelen); - } - if (iap->ia_valid & ATTR_GID) { - bmval1 |= FATTR4_WORD1_OWNER_GROUP; + if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) p = xdr_encode_opaque(p, owner_group, owner_grouplen); + if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { + if (iap->ia_valid & ATTR_ATIME_SET) { + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec); + *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } - if (iap->ia_valid & ATTR_ATIME_SET) { - bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_atime.tv_sec); - *p++ = cpu_to_be32(iap->ia_atime.tv_nsec); - } - else if (iap->ia_valid & ATTR_ATIME) { - bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); - } - if (iap->ia_valid & ATTR_MTIME_SET) { - bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); - *p++ = cpu_to_be32(0); - *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); - *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); - } - else if (iap->ia_valid & ATTR_MTIME) { - bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; - *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); + if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { + if (iap->ia_valid & ATTR_MTIME_SET) { + *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); + p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec); + *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); + } else + *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME); } - - /* - * Now we backfill the bitmap and the attribute buffer length. - */ - if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", - len, ((char *)p - (char *)q) + 4); - BUG(); + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + *p++ = cpu_to_be32(label->lfs); + *p++ = cpu_to_be32(label->pi); + *p++ = cpu_to_be32(label->len); + p = xdr_encode_opaque_fixed(p, label->label, label->len); } - len = (char *)p - (char *)q - 12; - *q++ = htonl(bmval0); - *q++ = htonl(bmval1); - *q = htonl(len); /* out: */ } @@ -1142,7 +1172,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - encode_attrs(xdr, create->attrs, create->server); + encode_attrs(xdr, create->attrs, create->label, create->server); } static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) @@ -1194,8 +1224,10 @@ encode_getattr_three(struct xdr_stream *xdr, static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], hdr); + encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1], + bitmask[2] & nfs4_fattr_bitmap[2], + hdr); } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, @@ -1366,33 +1398,28 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) { + struct iattr dummy; __be32 *p; - struct nfs_client *clp; p = reserve_space(xdr, 4); - switch(arg->open_flags & O_EXCL) { - case 0: + switch(arg->createmode) { + case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); - encode_attrs(xdr, arg->u.attrs, arg->server); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); break; - default: - clp = arg->server->nfs_client; - if (clp->cl_mvops->minor_version > 0) { - if (nfs4_has_persistent_session(clp)) { - *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->server); - } else { - struct iattr dummy; - - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); - encode_nfs4_verifier(xdr, &arg->u.verifier); - dummy.ia_valid = 0; - encode_attrs(xdr, &dummy, arg->server); - } - } else { - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); - } + case NFS4_CREATE_GUARDED: + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server); + break; + case NFS4_CREATE_EXCLUSIVE: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->label, arg->server); } } @@ -1459,6 +1486,23 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc encode_string(xdr, name->len, name->name); } +static inline void encode_claim_fh(struct xdr_stream *xdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_FH); +} + +static inline void encode_claim_delegate_cur_fh(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEG_CUR_FH); + encode_nfs4_stateid(xdr, stateid); +} + static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); @@ -1474,6 +1518,12 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, case NFS4_OPEN_CLAIM_DELEGATE_CUR: encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); break; + case NFS4_OPEN_CLAIM_FH: + encode_claim_fh(xdr); + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + encode_claim_delegate_cur_fh(xdr, &arg->u.delegation); + break; default: BUG(); } @@ -1506,35 +1556,13 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_open_stateid(struct xdr_stream *xdr, - const struct nfs_open_context *ctx, - const struct nfs_lock_context *l_ctx, - fmode_t fmode, - int zero_seqid) -{ - nfs4_stateid stateid; - - if (ctx->state != NULL) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - nfs4_select_rw_stateid(&stateid, ctx->state, - fmode, lockowner); - if (zero_seqid) - stateid.seqid = 0; - encode_nfs4_stateid(xdr, &stateid); - } else - encode_nfs4_stateid(xdr, &zero_stateid); -} - -static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) +static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_READ, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1543,12 +1571,14 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { - uint32_t attrs[2] = { + uint32_t attrs[3] = { FATTR4_WORD0_RDATTR_ERROR, FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count >> 1; __be32 *p, verf[2]; + uint32_t attrlen = 0; + unsigned int i; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1557,29 +1587,36 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; dircount >>= 1; } /* Use mounted_on_fileid only if the server supports it */ if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; + for (i = 0; i < ARRAY_SIZE(attrs); i++) { + attrs[i] &= readdir->bitmask[i]; + if (attrs[i] != 0) + attrlen = i+1; + } encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); encode_uint64(xdr, readdir->cookie); encode_nfs4_verifier(xdr, &readdir->verifier); - p = reserve_space(xdr, 20); + p = reserve_space(xdr, 12 + (attrlen << 2)); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); - *p++ = cpu_to_be32(2); - - *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); - *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); + *p++ = cpu_to_be32(attrlen); + for (i = 0; i < attrlen; i++) + *p++ = cpu_to_be32(attrs[i]); memcpy(verf, readdir->verifier.data, sizeof(verf)); - dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", + + dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n", __func__, (unsigned long long)readdir->cookie, verf[0], verf[1], attrs[0] & readdir->bitmask[0], - attrs[1] & readdir->bitmask[1]); + attrs[1] & readdir->bitmask[1], + attrs[2] & readdir->bitmask[2]); } static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1638,7 +1675,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs { encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); - encode_attrs(xdr, arg->iap, server); + encode_attrs(xdr, arg->iap, arg->label, server); } static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) @@ -1665,13 +1702,13 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 encode_nfs4_verifier(xdr, &arg->confirm); } -static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); - encode_open_stateid(xdr, args->context, args->lock_context, - FMODE_WRITE, hdr->minorversion); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); @@ -1709,6 +1746,14 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr, *p = 0; /* use_conn_in_rdma_mode = False */ } +static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) +{ + unsigned int i; + encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS); + for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) + encode_uint32(xdr, op_map->u.words[i]); +} + static void encode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_args *args, struct compound_hdr *hdr) @@ -1722,9 +1767,20 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_string(xdr, args->id_len, args->id); - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(args->flags); - *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ + encode_uint32(xdr, args->flags); + encode_uint32(xdr, args->state_protect.how); + + switch (args->state_protect.how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + encode_op_map(xdr, &args->state_protect.enforce); + encode_op_map(xdr, &args->state_protect.allow); + break; + default: + WARN_ON_ONCE(1); + break; + } if (send_implementation_id && sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && @@ -1735,7 +1791,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, utsname()->version, utsname()->machine); if (len > 0) { - *p = cpu_to_be32(1); /* implementation id array length=1 */ + encode_uint32(xdr, 1); /* implementation id array length=1 */ encode_string(xdr, sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, @@ -1746,7 +1802,7 @@ static void encode_exchange_id(struct xdr_stream *xdr, p = xdr_encode_hyper(p, 0); *p = cpu_to_be32(0); } else - *p = cpu_to_be32(0); /* implementation id array length=0 */ + encode_uint32(xdr, 0); /* implementation id array length=0 */ } static void encode_create_session(struct xdr_stream *xdr, @@ -1799,7 +1855,7 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */ /* authsys_parms rfc1831 */ - *p++ = (__be32)nn->boot_time.tv_nsec; /* stamp */ + *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ p = xdr_encode_opaque(p, machine_name, len); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ @@ -1841,11 +1897,10 @@ static void encode_sequence(struct xdr_stream *xdr, struct nfs4_slot *slot = args->sa_slot; __be32 *p; - if (slot == NULL) - return; - tp = slot->table; session = tp->session; + if (!session) + return; encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); @@ -1901,7 +1956,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr, p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(args->pdev->layout_type); - *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ + *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ *p++ = cpu_to_be32(0); /* bitmap length 0 */ } @@ -2015,7 +2070,7 @@ static void encode_free_stateid(struct xdr_stream *xdr, struct compound_hdr *hdr) { encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); } #endif /* CONFIG_NFS_V4_1 */ @@ -2026,9 +2081,9 @@ static void encode_free_stateid(struct xdr_stream *xdr, static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) { #if defined(CONFIG_NFS_V4_1) - - if (args->sa_slot) - return args->sa_slot->table->session->clp->cl_mvops->minor_version; + struct nfs4_session *session = args->sa_slot->table->session; + if (session) + return session->clp->cl_mvops->minor_version; #endif /* CONFIG_NFS_V4_1 */ return 0; } @@ -2398,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a READ request */ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2460,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a WRITE request */ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2609,12 +2664,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, struct compound_hdr hdr = { .nops = 0, }; - const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_setclientid_confirm(xdr, arg, &hdr); - encode_putrootfh(xdr, &hdr); - encode_fsinfo(xdr, lease_bitmap, &hdr); encode_nops(&hdr); } @@ -2651,11 +2703,20 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->dir_fh, &hdr); - encode_lookup(xdr, args->name, &hdr); - replen = hdr.replen; /* get the attribute into args->page */ - encode_fs_locations(xdr, args->bitmask, &hdr); + if (args->migration) { + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen; + encode_fs_locations(xdr, args->bitmask, &hdr); + if (args->renew) + encode_renew(xdr, args->clientid, &hdr); + } else { + encode_putfh(xdr, args->dir_fh, &hdr); + encode_lookup(xdr, args->name, &hdr); + replen = hdr.replen; + encode_fs_locations(xdr, args->bitmask, &hdr); + } + /* Set up reply kvec to capture returned fs_locations array. */ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); @@ -2679,6 +2740,26 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode FSID_PRESENT request + */ +static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_fsid_present_arg *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getfh(xdr, &hdr); + if (args->renew) + encode_renew(xdr, args->clientid, &hdr); + encode_nops(&hdr); +} + #if defined(CONFIG_NFS_V4_1) /* * BIND_CONN_TO_SESSION request @@ -3017,7 +3098,8 @@ out_overflow: return -EIO; } -static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, + int *nfs_retval) { __be32 *p; uint32_t opnum; @@ -3027,19 +3109,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) if (unlikely(!p)) goto out_overflow; opnum = be32_to_cpup(p++); - if (opnum != expected) { - dprintk("nfs: Server returned operation" - " %d but we issued a request for %d\n", - opnum, expected); - return -EIO; - } + if (unlikely(opnum != expected)) + goto out_bad_operation; nfserr = be32_to_cpup(p); - if (nfserr != NFS_OK) - return nfs4_stat_to_errno(nfserr); - return 0; + if (nfserr == NFS_OK) + *nfs_retval = 0; + else + *nfs_retval = nfs4_stat_to_errno(nfserr); + return true; +out_bad_operation: + dprintk("nfs: Server returned operation" + " %d but we issued a request for %d\n", + opnum, expected); + *nfs_retval = -EREMOTEIO; + return false; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + *nfs_retval = -EIO; + return false; +} + +static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + int retval; + + __decode_op_hdr(xdr, expected, &retval); + return retval; } /* Dummy routine */ @@ -3355,7 +3450,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint { __be32 *p; - *res = ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL; + *res = 0; if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { @@ -3497,8 +3592,11 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) if (n == 0) goto root_path; dprintk("pathname4: "); - path->ncomponents = 0; - while (path->ncomponents < n) { + if (n > NFS4_PATHNAME_MAXCOMPONENTS) { + dprintk("cannot parse %d components in path\n", n); + goto out_eio; + } + for (path->ncomponents = 0; path->ncomponents < n; path->ncomponents++) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) @@ -3507,12 +3605,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) pr_cont("%s%.*s ", (path->ncomponents != n ? "/ " : ""), component->len, component->data); - if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) - path->ncomponents++; - else { - dprintk("cannot parse %d components in path\n", n); - goto out_eio; - } } out: return status; @@ -3557,27 +3649,23 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st n = be32_to_cpup(p); if (n <= 0) goto out_eio; - res->nlocations = 0; - while (res->nlocations < n) { + for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; - struct nfs4_fs_location *loc = &res->locations[res->nlocations]; + struct nfs4_fs_location *loc; + if (res->nlocations == NFS4_FS_LOCATIONS_MAXENTRIES) + break; + loc = &res->locations[res->nlocations]; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; m = be32_to_cpup(p); - loc->nservers = 0; dprintk("%s: servers:\n", __func__); - while (loc->nservers < m) { - struct nfs4_string *server = &loc->servers[loc->nservers]; - status = decode_opaque_inline(xdr, &server->len, &server->data); - if (unlikely(status != 0)) - goto out_eio; - dprintk("%s ", server->data); - if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS) - loc->nservers++; - else { + for (loc->nservers = 0; loc->nservers < m; loc->nservers++) { + struct nfs4_string *server; + + if (loc->nservers == NFS4_FS_LOCATION_MAXSERVERS) { unsigned int i; dprintk("%s: using first %u of %u servers " "returned for location %u\n", @@ -3591,13 +3679,17 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(status != 0)) goto out_eio; } + break; } + server = &loc->servers[loc->nservers]; + status = decode_opaque_inline(xdr, &server->len, &server->data); + if (unlikely(status != 0)) + goto out_eio; + dprintk("%s ", server->data); } status = decode_pathname(xdr, &loc->rootpath); if (unlikely(status != 0)) goto out_eio; - if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) - res->nlocations++; } if (res->nlocations != 0) status = NFS_ATTR_FATTR_V4_LOCATIONS; @@ -3778,14 +3870,14 @@ out_overflow: } static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, - const struct nfs_server *server, uint32_t *uid, + const struct nfs_server *server, kuid_t *uid, struct nfs4_string *owner_name) { uint32_t len; __be32 *p; int ret = 0; - *uid = -2; + *uid = make_kuid(&init_user_ns, -2); if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { @@ -3813,7 +3905,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, __func__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER; } - dprintk("%s: uid=%d\n", __func__, (int)*uid); + dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid)); return ret; out_overflow: print_overflow_msg(__func__, xdr); @@ -3821,14 +3913,14 @@ out_overflow: } static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, - const struct nfs_server *server, uint32_t *gid, + const struct nfs_server *server, kgid_t *gid, struct nfs4_string *group_name) { uint32_t len; __be32 *p; int ret = 0; - *gid = -2; + *gid = make_kgid(&init_user_ns, -2); if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) return -EIO; if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { @@ -3856,7 +3948,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, __func__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; } - dprintk("%s: gid=%d\n", __func__, (int)*gid); + dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid)); return ret; out_overflow: print_overflow_msg(__func__, xdr); @@ -4056,6 +4148,56 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, return status; } +static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_label *label) +{ + uint32_t pi = 0; + uint32_t lfs = 0; + __u32 len; + __be32 *p; + int status = 0; + + if (unlikely(bitmap[2] & (FATTR4_WORD2_SECURITY_LABEL - 1U))) + return -EIO; + if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + lfs = be32_to_cpup(p++); + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + pi = be32_to_cpup(p++); + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + len = be32_to_cpup(p++); + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + goto out_overflow; + if (len < NFS4_MAXLABELLEN) { + if (label) { + memcpy(label->label, p, len); + label->len = len; + label->pi = pi; + label->lfs = lfs; + status = NFS_ATTR_FATTR_V4_SECURITY_LABEL; + } + bitmap[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + } else + printk(KERN_WARNING "%s: label too long (%u)!\n", + __func__, len); + } + if (label && label->label) + dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, + (char *)label->label, label->len, label->pi, label->lfs); + return status; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) { int status = 0; @@ -4398,7 +4540,7 @@ out_overflow: static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, - struct nfs4_fs_locations *fs_loc, + struct nfs4_fs_locations *fs_loc, struct nfs4_label *label, const struct nfs_server *server) { int status; @@ -4506,6 +4648,13 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, if (status < 0) goto xdr_error; + if (label) { + status = decode_attr_security_label(xdr, bitmap, label); + if (status < 0) + goto xdr_error; + fattr->valid |= status; + } + xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); return status; @@ -4513,7 +4662,7 @@ xdr_error: static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, - const struct nfs_server *server) + struct nfs4_label *label, const struct nfs_server *server) { unsigned int savep; uint32_t attrlen, @@ -4532,7 +4681,8 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, + label, server); if (status < 0) goto xdr_error; @@ -4542,10 +4692,16 @@ xdr_error: return status; } +static int decode_getfattr_label(struct xdr_stream *xdr, struct nfs_fattr *fattr, + struct nfs4_label *label, const struct nfs_server *server) +{ + return decode_getfattr_generic(xdr, fattr, NULL, NULL, label, server); +} + static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, NULL, NULL, server); } /* @@ -4555,7 +4711,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, uint32_t *layouttype) { - uint32_t *p; + __be32 *p; int num; p = xdr_inline_decode(xdr, 4); @@ -4860,11 +5016,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) uint32_t savewords, bmlen, i; int status; - status = decode_op_hdr(xdr, OP_OPEN); - if (status != -EIO) - nfs_increment_open_seqid(status, res->seqid); - if (!status) - status = decode_stateid(xdr, &res->stateid); + if (!__decode_op_hdr(xdr, OP_OPEN, &status)) + return status; + nfs_increment_open_seqid(status, res->seqid); + if (status) + return status; + status = decode_stateid(xdr, &res->stateid); if (unlikely(status)) return status; @@ -4930,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_PUTROOTFH); } -static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) +static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs_pgio_res *res) { __be32 *p; uint32_t count, eof, recvd; @@ -5184,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); } -static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) { __be32 *p; int status; @@ -5209,27 +5367,30 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } -static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor) +static int decode_secinfo_gss(struct xdr_stream *xdr, + struct nfs4_secinfo4 *flavor) { + u32 oid_len; __be32 *p; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - flavor->gss.sec_oid4.len = be32_to_cpup(p); - if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN) + oid_len = be32_to_cpup(p); + if (oid_len > GSS_OID_MAX_LEN) goto out_err; - p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len); + p = xdr_inline_decode(xdr, oid_len); if (unlikely(!p)) goto out_overflow; - memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len); + memcpy(flavor->flavor_info.oid.data, p, oid_len); + flavor->flavor_info.oid.len = oid_len; p = xdr_inline_decode(xdr, 8); if (unlikely(!p)) goto out_overflow; - flavor->gss.qop4 = be32_to_cpup(p++); - flavor->gss.service = be32_to_cpup(p); + flavor->flavor_info.qop = be32_to_cpup(p++); + flavor->flavor_info.service = be32_to_cpup(p); return 0; @@ -5242,10 +5403,10 @@ out_err: static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) { - struct nfs4_secinfo_flavor *sec_flavor; + struct nfs4_secinfo4 *sec_flavor; + unsigned int i, num_flavors; int status; __be32 *p; - int i, num_flavors; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) @@ -5297,6 +5458,23 @@ static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_re return decode_secinfo_common(xdr, res); } +static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map) +{ + __be32 *p; + uint32_t bitmap_words; + unsigned int i; + + p = xdr_inline_decode(xdr, 4); + bitmap_words = be32_to_cpup(p++); + if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) + return -EIO; + p = xdr_inline_decode(xdr, 4 * bitmap_words); + for (i = 0; i < bitmap_words; i++) + op_map->u.words[i] = be32_to_cpup(p++); + + return 0; +} + static int decode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_res *res) { @@ -5320,10 +5498,22 @@ static int decode_exchange_id(struct xdr_stream *xdr, res->seqid = be32_to_cpup(p++); res->flags = be32_to_cpup(p++); - /* We ask for SP4_NONE */ - dummy = be32_to_cpup(p); - if (dummy != SP4_NONE) + res->state_protect.how = be32_to_cpup(p); + switch (res->state_protect.how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + status = decode_op_map(xdr, &res->state_protect.enforce); + if (status) + return status; + status = decode_op_map(xdr, &res->state_protect.allow); + if (status) + return status; + break; + default: + WARN_ON_ONCE(1); return -EIO; + } /* server_owner4.so_minor_id */ p = xdr_inline_decode(xdr, 8); @@ -5517,6 +5707,8 @@ static int decode_sequence(struct xdr_stream *xdr, if (res->sr_slot == NULL) return 0; + if (!res->sr_slot->table->session) + return 0; status = decode_op_hdr(xdr, OP_SEQUENCE); if (!status) @@ -5835,21 +6027,8 @@ out: static int decode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_res *res) { - __be32 *p; - int status; - - status = decode_op_hdr(xdr, OP_FREE_STATEID); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - res->status = be32_to_cpup(p++); + res->status = decode_op_hdr(xdr, OP_FREE_STATEID); return res->status; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } #endif /* CONFIG_NFS_V4_1 */ @@ -5934,7 +6113,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - status = decode_getfattr(xdr, res->fattr, res->server); + status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); out: return status; } @@ -5960,7 +6139,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, goto out; status = decode_getfh(xdr, res->fh); if (status == 0) - status = decode_getfattr(xdr, res->fattr, res->server); + status = decode_getfattr_label(xdr, res->fattr, + res->label, res->server); out: return status; } @@ -6051,7 +6231,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_restorefh(xdr); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server); + decode_getfattr_label(xdr, res->fattr, res->label, res->server); out: return status; } @@ -6080,7 +6260,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, res->fh); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server); + decode_getfattr_label(xdr, res->fattr, res->label, res->server); out: return status; } @@ -6112,7 +6292,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getfattr(xdr, res->fattr, res->server); + status = decode_getfattr_label(xdr, res->fattr, res->label, res->server); out: return status; } @@ -6245,7 +6425,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, goto out; if (res->access_request) decode_access(xdr, &res->access_supported, &res->access_result); - decode_getfattr(xdr, res->f_attr, res->server); + decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server); out: return status; } @@ -6322,7 +6502,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, status = decode_setattr(xdr); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server); + decode_getfattr_label(xdr, res->fattr, res->label, res->server); out: return status; } @@ -6459,7 +6639,7 @@ out: * Decode Read response */ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_readres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; @@ -6484,7 +6664,7 @@ out: * Decode WRITE response */ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_writeres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; @@ -6648,8 +6828,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, * Decode SETCLIENTID_CONFIRM response */ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs_fsinfo *fsinfo) + struct xdr_stream *xdr) { struct compound_hdr hdr; int status; @@ -6657,10 +6836,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, status = decode_compound_hdr(xdr, &hdr); if (!status) status = decode_setclientid_confirm(xdr); - if (!status) - status = decode_putrootfh(xdr); - if (!status) - status = decode_fsinfo(xdr, fsinfo); return status; } @@ -6710,13 +6885,26 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_putfh(xdr); if (status) goto out; - status = decode_lookup(xdr); - if (status) - goto out; - xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, + if (res->migration) { + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, + &res->fs_locations->fattr, NULL, res->fs_locations, - res->fs_locations->server); + NULL, res->fs_locations->server); + if (status) + goto out; + if (res->renew) + status = decode_renew(xdr); + } else { + status = decode_lookup(xdr); + if (status) + goto out; + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, + &res->fs_locations->fattr, + NULL, res->fs_locations, + NULL, res->fs_locations->server); + } out: return status; } @@ -6745,6 +6933,34 @@ out: return status; } +/* + * Decode FSID_PRESENT response + */ +static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_fsid_present_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_getfh(xdr, res->fh); + if (status) + goto out; + if (res->renew) + status = decode_renew(xdr); +out: + return status; +} + #if defined(CONFIG_NFS_V4_1) /* * Decode BIND_CONN_TO_SESSION response @@ -7129,7 +7345,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - NULL, entry->server) < 0) + NULL, entry->label, entry->server) < 0) goto out_overflow; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; @@ -7259,6 +7475,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), + PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), #if defined(CONFIG_NFS_V4_1) PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC(CREATE_SESSION, enc_create_session, dec_create_session), diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c new file mode 100644 index 00000000000..4eb0aead69b --- /dev/null +++ b/fs/nfs/nfstrace.c @@ -0,0 +1,9 @@ +/* + * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#include <linux/nfs_fs.h> +#include <linux/namei.h> +#include "internal.h" + +#define CREATE_TRACE_POINTS +#include "nfstrace.h" diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h new file mode 100644 index 00000000000..59f838cdc00 --- /dev/null +++ b/fs/nfs/nfstrace.h @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com> + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs + +#if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS_H + +#include <linux/tracepoint.h> + +#define nfs_show_file_type(ftype) \ + __print_symbolic(ftype, \ + { DT_UNKNOWN, "UNKNOWN" }, \ + { DT_FIFO, "FIFO" }, \ + { DT_CHR, "CHR" }, \ + { DT_DIR, "DIR" }, \ + { DT_BLK, "BLK" }, \ + { DT_REG, "REG" }, \ + { DT_LNK, "LNK" }, \ + { DT_SOCK, "SOCK" }, \ + { DT_WHT, "WHT" }) + +#define nfs_show_cache_validity(v) \ + __print_flags(v, "|", \ + { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \ + { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ + { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ + { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ + { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ + { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ + { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ + { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }) + +#define nfs_show_nfsi_flags(v) \ + __print_flags(v, "|", \ + { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ + { 1 << NFS_INO_STALE, "STALE" }, \ + { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ + { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ + { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ + { 1 << NFS_INO_COMMIT, "COMMIT" }, \ + { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ + { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) + +DECLARE_EVENT_CLASS(nfs_inode_event, + TP_PROTO( + const struct inode *inode + ), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(u64, version) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->version = inode->i_version; + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (unsigned long long)__entry->version + ) +); + +DECLARE_EVENT_CLASS(nfs_inode_event_done, + TP_PROTO( + const struct inode *inode, + int error + ), + + TP_ARGS(inode, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(unsigned char, type) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, size) + __field(unsigned long, nfsi_flags) + __field(unsigned long, cache_validity) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->error = error; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->type = nfs_umode_to_dtype(inode->i_mode); + __entry->version = inode->i_version; + __entry->size = i_size_read(inode); + __entry->nfsi_flags = nfsi->flags; + __entry->cache_validity = nfsi->cache_validity; + ), + + TP_printk( + "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "type=%u (%s) version=%llu size=%lld " + "cache_validity=%lu (%s) nfs_flags=%ld (%s)", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->type, + nfs_show_file_type(__entry->type), + (unsigned long long)__entry->version, + (long long)__entry->size, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity), + __entry->nfsi_flags, + nfs_show_nfsi_flags(__entry->nfsi_flags) + ) +); + +#define DEFINE_NFS_INODE_EVENT(name) \ + DEFINE_EVENT(nfs_inode_event, name, \ + TP_PROTO( \ + const struct inode *inode \ + ), \ + TP_ARGS(inode)) +#define DEFINE_NFS_INODE_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_inode_event_done, name, \ + TP_PROTO( \ + const struct inode *inode, \ + int error \ + ), \ + TP_ARGS(inode, error)) +DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit); +DEFINE_NFS_INODE_EVENT(nfs_getattr_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit); +DEFINE_NFS_INODE_EVENT(nfs_setattr_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit); +DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit); +DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); +DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); +DEFINE_NFS_INODE_EVENT(nfs_access_enter); +DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); + +#define show_lookup_flags(flags) \ + __print_flags((unsigned long)flags, "|", \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_OPEN, "OPEN" }, \ + { LOOKUP_CREATE, "CREATE" }, \ + { LOOKUP_EXCL, "EXCL" }) + +DECLARE_EVENT_CLASS(nfs_lookup_event, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags + ), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->flags, + show_lookup_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_LOOKUP_EVENT(name) \ + DEFINE_EVENT(nfs_lookup_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + unsigned int flags \ + ), \ + TP_ARGS(dir, dentry, flags)) + +DECLARE_EVENT_CLASS(nfs_lookup_event_done, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags, + int error + ), + + TP_ARGS(dir, dentry, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_lookup_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_lookup_event_done, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + unsigned int flags, \ + int error \ + ), \ + TP_ARGS(dir, dentry, flags, error)) + +DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter); +DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); +DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); +DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); + +#define show_open_flags(flags) \ + __print_flags((unsigned long)flags, "|", \ + { O_CREAT, "O_CREAT" }, \ + { O_EXCL, "O_EXCL" }, \ + { O_TRUNC, "O_TRUNC" }, \ + { O_APPEND, "O_APPEND" }, \ + { O_DSYNC, "O_DSYNC" }, \ + { O_DIRECT, "O_DIRECT" }, \ + { O_DIRECTORY, "O_DIRECTORY" }) + +#define show_fmode_flags(mode) \ + __print_flags(mode, "|", \ + { ((__force unsigned long)FMODE_READ), "READ" }, \ + { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \ + { ((__force unsigned long)FMODE_EXEC), "EXEC" }) + +TRACE_EVENT(nfs_atomic_open_enter, + TP_PROTO( + const struct inode *dir, + const struct nfs_open_context *ctx, + unsigned int flags + ), + + TP_ARGS(dir, ctx, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s", + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_atomic_open_exit, + TP_PROTO( + const struct inode *dir, + const struct nfs_open_context *ctx, + unsigned int flags, + int error + ), + + TP_ARGS(dir, ctx, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(unsigned int, fmode) + __field(dev_t, dev) + __field(u64, dir) + __string(name, ctx->dentry->d_name.name) + ), + + TP_fast_assign( + __entry->error = error; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __entry->fmode = (__force unsigned int)ctx->mode; + __assign_str(name, ctx->dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) fmode=%s " + "name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_open_flags(__entry->flags), + show_fmode_flags(__entry->fmode), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_create_enter, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags + ), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->flags, + show_open_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_create_exit, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + unsigned int flags, + int error + ), + + TP_ARGS(dir, dentry, flags, error), + + TP_STRUCT__entry( + __field(int, error) + __field(unsigned int, flags) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->error = error; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->flags = flags; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", + __entry->error, + __entry->flags, + show_open_flags(__entry->flags), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +DECLARE_EVENT_CLASS(nfs_directory_event, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry + ), + + TP_ARGS(dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_DIRECTORY_EVENT(name) \ + DEFINE_EVENT(nfs_directory_event, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry \ + ), \ + TP_ARGS(dir, dentry)) + +DECLARE_EVENT_CLASS(nfs_directory_event_done, + TP_PROTO( + const struct inode *dir, + const struct dentry *dentry, + int error + ), + + TP_ARGS(dir, dentry, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +#define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_directory_event_done, name, \ + TP_PROTO( \ + const struct inode *dir, \ + const struct dentry *dentry, \ + int error \ + ), \ + TP_ARGS(dir, dentry, error)) + +DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit); +DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter); +DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit); + +TRACE_EVENT(nfs_link_enter, + TP_PROTO( + const struct inode *inode, + const struct inode *dir, + const struct dentry *dentry + ), + + TP_ARGS(inode, dir, dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->dir = NFS_FILEID(dir); + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fileid, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +TRACE_EVENT(nfs_link_exit, + TP_PROTO( + const struct inode *inode, + const struct inode *dir, + const struct dentry *dentry, + int error + ), + + TP_ARGS(inode, dir, dentry, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u64, fileid) + __field(u64, dir) + __string(name, dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + __assign_str(name, dentry->d_name.name); + ), + + TP_printk( + "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->fileid, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); + +DECLARE_EVENT_CLASS(nfs_rename_event, + TP_PROTO( + const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry + ), + + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, old_dir) + __field(u64, new_dir) + __string(old_name, old_dentry->d_name.name) + __string(new_name, new_dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = old_dir->i_sb->s_dev; + __entry->old_dir = NFS_FILEID(old_dir); + __entry->new_dir = NFS_FILEID(new_dir); + __assign_str(old_name, old_dentry->d_name.name); + __assign_str(new_name, new_dentry->d_name.name); + ), + + TP_printk( + "old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->old_dir, + __get_str(old_name), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->new_dir, + __get_str(new_name) + ) +); +#define DEFINE_NFS_RENAME_EVENT(name) \ + DEFINE_EVENT(nfs_rename_event, name, \ + TP_PROTO( \ + const struct inode *old_dir, \ + const struct dentry *old_dentry, \ + const struct inode *new_dir, \ + const struct dentry *new_dentry \ + ), \ + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry)) + +DECLARE_EVENT_CLASS(nfs_rename_event_done, + TP_PROTO( + const struct inode *old_dir, + const struct dentry *old_dentry, + const struct inode *new_dir, + const struct dentry *new_dentry, + int error + ), + + TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, old_dir) + __string(old_name, old_dentry->d_name.name) + __field(u64, new_dir) + __string(new_name, new_dentry->d_name.name) + ), + + TP_fast_assign( + __entry->dev = old_dir->i_sb->s_dev; + __entry->old_dir = NFS_FILEID(old_dir); + __entry->new_dir = NFS_FILEID(new_dir); + __entry->error = error; + __assign_str(old_name, old_dentry->d_name.name); + __assign_str(new_name, new_dentry->d_name.name); + ), + + TP_printk( + "error=%d old_name=%02x:%02x:%llu/%s " + "new_name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->old_dir, + __get_str(old_name), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->new_dir, + __get_str(new_name) + ) +); +#define DEFINE_NFS_RENAME_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_rename_event_done, name, \ + TP_PROTO( \ + const struct inode *old_dir, \ + const struct dentry *old_dentry, \ + const struct inode *new_dir, \ + const struct dentry *new_dentry, \ + int error \ + ), \ + TP_ARGS(old_dir, old_dentry, new_dir, \ + new_dentry, error)) + +DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); +DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); + +DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename); + +TRACE_EVENT(nfs_sillyrename_unlink, + TP_PROTO( + const struct nfs_unlinkdata *data, + int error + ), + + TP_ARGS(data, error), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(u64, dir) + __dynamic_array(char, name, data->args.name.len + 1) + ), + + TP_fast_assign( + struct inode *dir = data->dir; + size_t len = data->args.name.len; + __entry->dev = dir->i_sb->s_dev; + __entry->dir = NFS_FILEID(dir); + __entry->error = error; + memcpy(__get_dynamic_array(name), + data->args.name.name, len); + ((char *)__get_dynamic_array(name))[len] = 0; + ), + + TP_printk( + "error=%d name=%02x:%02x:%llu/%s", + __entry->error, + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->dir, + __get_str(name) + ) +); +#endif /* _TRACE_NFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE nfstrace +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c6f990656f8..611320753db 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -234,7 +234,7 @@ static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, lseg = kzalloc(lseg_size, gfp_flags); if (unlikely(!lseg)) { - dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, + dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__, numdevs, lseg_size); return -ENOMEM; } @@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_read_data *rdata) +int objio_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; @@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_write_data *wdata = objios->oir.rpcdata; + struct nfs_pgio_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; @@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_write_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; @@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) return 0; } -static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!pnfs_generic_pg_test(pgio, prev, req)) - return false; + unsigned int size; + + size = pnfs_generic_pg_test(pgio, prev, req); + + if (!size || pgio->pg_count + req->wb_bytes > + (unsigned long)pgio->pg_layout_private) + return 0; - return pgio->pg_count + req->wb_bytes <= - (unsigned long)pgio->pg_layout_private; + return min(size, req->wb_bytes); } static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) @@ -647,6 +655,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { .flags = PNFS_LAYOUTRET_ON_SETATTR | PNFS_LAYOUTRET_ON_ERROR, + .owner = THIS_MODULE, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index a9ebd817278..765d3f54e98 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct objlayout *objlay; objlay = kzalloc(sizeof(struct objlayout), gfp_flags); - if (objlay) { - spin_lock_init(&objlay->lock); - INIT_LIST_HEAD(&objlay->err_list); - } + if (!objlay) + return NULL; + spin_lock_init(&objlay->lock); + INIT_LIST_HEAD(&objlay->err_list); dprintk("%s: Return %p\n", __func__, objlay); return &objlay->pnfs_layout; } @@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } @@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work) void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_read_data *rdata = oir->rpcdata; + struct nfs_pgio_data *rdata = oir->rpcdata; oir->status = rdata->task.tk_status = status; if (status >= 0) @@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_read_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; @@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_write_done(wdata); } @@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work) void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_write_data *wdata = oir->rpcdata; + struct nfs_pgio_data *wdata = oir->rpcdata; oir->status = wdata->task.tk_status = status; if (status >= 0) { @@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_write_data *wdata, +objlayout_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; @@ -613,8 +613,10 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, pd.pgbase = 0; pd.pglen = PAGE_SIZE; pd.mincount = 0; + pd.maxcount = PAGE_SIZE; - err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); + err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, + pnfslay->plh_lc_cred); dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); if (err) goto err_out; diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 880ba086be9..01e041029a6 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -114,13 +114,13 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); -/* objio_free_result will free these @oir structs recieved from +/* objio_free_result will free these @oir structs received from * objlayout_{read,write}_done */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_read_data *rdata); -extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_data *rdata); +extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_read_data *); + struct nfs_pgio_data *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_write_data *, + struct nfs_pgio_data *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e56e846e9d2..17fab89f635 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -24,9 +24,12 @@ #include "internal.h" #include "pnfs.h" +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + static struct kmem_cache *nfs_page_cachep; +static const struct rpc_call_ops nfs_pgio_common_ops; -bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) @@ -84,11 +87,217 @@ nfs_page_free(struct nfs_page *p) kmem_cache_free(nfs_page_cachep, p); } +static void +nfs_iocounter_inc(struct nfs_io_counter *c) +{ + atomic_inc(&c->io_count); +} + +static void +nfs_iocounter_dec(struct nfs_io_counter *c) +{ + if (atomic_dec_and_test(&c->io_count)) { + clear_bit(NFS_IO_INPROGRESS, &c->flags); + smp_mb__after_atomic(); + wake_up_bit(&c->flags, NFS_IO_INPROGRESS); + } +} + +static int +__nfs_iocounter_wait(struct nfs_io_counter *c) +{ + wait_queue_head_t *wq = bit_waitqueue(&c->flags, NFS_IO_INPROGRESS); + DEFINE_WAIT_BIT(q, &c->flags, NFS_IO_INPROGRESS); + int ret = 0; + + do { + prepare_to_wait(wq, &q.wait, TASK_KILLABLE); + set_bit(NFS_IO_INPROGRESS, &c->flags); + if (atomic_read(&c->io_count) == 0) + break; + ret = nfs_wait_bit_killable(&c->flags); + } while (atomic_read(&c->io_count) != 0); + finish_wait(wq, &q.wait); + return ret; +} + +/** + * nfs_iocounter_wait - wait for i/o to complete + * @c: nfs_io_counter to use + * + * returns -ERESTARTSYS if interrupted by a fatal signal. + * Otherwise returns 0 once the io_count hits 0. + */ +int +nfs_iocounter_wait(struct nfs_io_counter *c) +{ + if (atomic_read(&c->io_count) == 0) + return 0; + return __nfs_iocounter_wait(c); +} + +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + +/* + * nfs_page_group_lock - lock the head of the page group + * @req - request in group that is to be locked + * + * this lock must be held if modifying the page group list + */ +void +nfs_page_group_lock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req - request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + smp_mb__before_atomic(); + clear_bit(PG_HEADLOCK, &head->wb_flags); + smp_mb__after_atomic(); + wake_up_bit(&head->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_sync_on_bit_locked + * + * must be called with page group lock held + */ +static bool +nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +{ + struct nfs_page *head = req->wb_head; + struct nfs_page *tmp; + + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); + WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); + + tmp = req->wb_this_page; + while (tmp != req) { + if (!test_bit(bit, &tmp->wb_flags)) + return false; + tmp = tmp->wb_this_page; + } + + /* true! reset all bits */ + tmp = req; + do { + clear_bit(bit, &tmp->wb_flags); + tmp = tmp->wb_this_page; + } while (tmp != req); + + return true; +} + +/* + * nfs_page_group_sync_on_bit - set bit on current request, but only + * return true if the bit is set for all requests in page group + * @req - request in page group + * @bit - PG_* bit that is used to sync page group + */ +bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) +{ + bool ret; + + nfs_page_group_lock(req); + ret = nfs_page_group_sync_on_bit_locked(req, bit); + nfs_page_group_unlock(req); + + return ret; +} + +/* + * nfs_page_group_init - Initialize the page group linkage for @req + * @req - a new nfs request + * @prev - the previous request in page group, or NULL if @req is the first + * or only request in the group (the head). + */ +static inline void +nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) +{ + WARN_ON_ONCE(prev == req); + + if (!prev) { + /* a head request */ + req->wb_head = req; + req->wb_this_page = req; + } else { + /* a subrequest */ + WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); + req->wb_head = prev->wb_head; + req->wb_this_page = prev->wb_this_page; + prev->wb_this_page = req; + + /* All subrequests take a ref on the head request until + * nfs_page_group_destroy is called */ + kref_get(&req->wb_head->wb_kref); + + /* grab extra ref if head request has extra ref from + * the write/commit path to handle handoff between write + * and commit lists */ + if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + set_bit(PG_INODE_REF, &req->wb_flags); + kref_get(&req->wb_kref); + } + } +} + +/* + * nfs_page_group_destroy - sync the destruction of page groups + * @req - request that no longer needs the page group + * + * releases the page group reference from each member once all + * members have called this function. + */ +static void +nfs_page_group_destroy(struct kref *kref) +{ + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *tmp, *next; + + /* subrequests must release the ref on the head request */ + if (req->wb_head != req) + nfs_release_request(req->wb_head); + + if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) + return; + + tmp = req; + do { + next = tmp->wb_this_page; + /* unlink and free */ + tmp->wb_this_page = tmp; + tmp->wb_head = tmp; + nfs_free_request(tmp); + tmp = next; + } while (tmp != req); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use - * @inode: inode to which the request is attached * @page: page to write + * @last: last nfs request created for this page group or NULL if head * @offset: starting offset within the page for the write * @count: number of bytes to read/write * @@ -97,13 +306,15 @@ nfs_page_free(struct nfs_page *p) * User should ensure it is safe to sleep in this function. */ struct nfs_page * -nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, - struct page *page, - unsigned int offset, unsigned int count) +nfs_create_request(struct nfs_open_context *ctx, struct page *page, + struct nfs_page *last, unsigned int offset, + unsigned int count) { struct nfs_page *req; struct nfs_lock_context *l_ctx; + if (test_bit(NFS_CONTEXT_BAD, &ctx->flags)) + return ERR_PTR(-EBADF); /* try to allocate the request struct */ req = nfs_page_alloc(); if (req == NULL) @@ -116,6 +327,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, return ERR_CAST(l_ctx); } req->wb_lock_context = l_ctx; + nfs_iocounter_inc(&l_ctx->io_count); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in @@ -128,6 +340,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + nfs_page_group_init(req, last); return req; } @@ -141,9 +354,9 @@ void nfs_unlock_request(struct nfs_page *req) printk(KERN_ERR "NFS: Invalid unlock attempted\n"); BUG(); } - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(PG_BUSY, &req->wb_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&req->wb_flags, PG_BUSY); } @@ -175,6 +388,7 @@ static void nfs_clear_request(struct nfs_page *req) req->wb_page = NULL; } if (l_ctx != NULL) { + nfs_iocounter_dec(&l_ctx->io_count); nfs_put_lock_context(l_ctx); req->wb_lock_context = NULL; } @@ -184,16 +398,22 @@ static void nfs_clear_request(struct nfs_page *req) } } - /** * nfs_release_request - Release the count on an NFS read/write request * @req: request to release * * Note: Should never be called with the spinlock held! */ -static void nfs_free_request(struct kref *kref) +void nfs_free_request(struct nfs_page *req) { - struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + WARN_ON_ONCE(req->wb_this_page != req); + + /* extra debug: make sure no sync bits are still set */ + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); @@ -202,13 +422,7 @@ static void nfs_free_request(struct kref *kref) void nfs_release_request(struct nfs_page *req) { - kref_put(&req->wb_kref, nfs_free_request); -} - -static int nfs_wait_bit_uninterruptible(void *word) -{ - io_schedule(); - return 0; + kref_put(&req->wb_kref, nfs_page_group_destroy); } /** @@ -226,22 +440,249 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } -bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +/* + * nfs_generic_pg_test - determine if requests can be coalesced + * @desc: pointer to descriptor + * @prev: previous request in desc, or NULL + * @req: this request + * + * Returns zero if @req can be coalesced into @desc, otherwise it returns + * the size of the request. + */ +size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, struct nfs_page *req) { - /* - * FIXME: ideally we should be able to coalesce all requests - * that are not block boundary aligned, but currently this - * is problematic for the case of bsize < PAGE_CACHE_SIZE, - * since nfs_flush_multi and nfs_pagein_multi assume you - * can have only one struct nfs_page. - */ - if (desc->pg_bsize < PAGE_SIZE) + if (desc->pg_count > desc->pg_bsize) { + /* should never happen */ + WARN_ON_ONCE(1); return 0; + } - return desc->pg_count + req->wb_bytes <= desc->pg_bsize; + return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); +static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) +{ + return container_of(hdr, struct nfs_rw_header, header); +} + +/** + * nfs_rw_header_alloc - Allocate a header for a read or write + * @ops: Read or write function vector + */ +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) +{ + struct nfs_rw_header *header = ops->rw_alloc_header(); + + if (header) { + struct nfs_pgio_header *hdr = &header->header; + + INIT_LIST_HEAD(&hdr->pages); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); + hdr->rw_ops = ops; + } + return header; +} +EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); + +/* + * nfs_rw_header_free - Free a read or write header + * @hdr: The header to free + */ +void nfs_rw_header_free(struct nfs_pgio_header *hdr) +{ + hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); +} +EXPORT_SYMBOL_GPL(nfs_rw_header_free); + +/** + * nfs_pgio_data_alloc - Allocate pageio data + * @hdr: The header making a request + * @pagecount: Number of pages to create + */ +static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_pgio_data *data, *prealloc; + + prealloc = &NFS_RW_HEADER(hdr)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { + data->header = hdr; + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; + } +out: + return data; +} + +/** + * nfs_pgio_data_release - Properly free pageio data + * @data: The data to release + */ +void nfs_pgio_data_release(struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); + + put_nfs_open_context(data->args.context); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); + if (data == &pageio_header->rpc_data) { + data->header = NULL; + data = NULL; + } + if (atomic_dec_and_test(&hdr->refcnt)) + hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(data); +} +EXPORT_SYMBOL_GPL(nfs_pgio_data_release); + +/** + * nfs_pgio_rpcsetup - Set up arguments for a pageio call + * @data: The pageio data + * @count: Number of bytes to read + * @offset: Initial offset + * @how: How to commit data (writes only) + * @cinfo: Commit information for the call (writes only) + */ +static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, + unsigned int count, unsigned int offset, + int how, struct nfs_commit_info *cinfo) +{ + struct nfs_page *req = data->header->req; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + data->args.fh = NFS_FH(data->header->inode); + data->args.offset = req_offset(req) + offset; + /* pnfs_set_layoutcommit needs this */ + data->mds_offset = data->args.offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pages.pagevec; + data->args.count = count; + data->args.context = get_nfs_open_context(req->wb_context); + data->args.lock_context = req->wb_lock_context; + data->args.stable = NFS_UNSTABLE; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_reqs_to_commit(cinfo)) + break; + default: + data->args.stable = NFS_FILE_SYNC; + } + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); +} + +/** + * nfs_pgio_prepare - Prepare pageio data to go over the wire + * @task: The current task + * @calldata: pageio data to prepare + */ +static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + int err; + err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + if (err) + rpc_exit(task, err); +} + +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, + const struct rpc_call_ops *call_ops, int how, int flags) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->header->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC | flags, + }; + int ret = 0; + + data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + + dprintk("NFS: %5u initiated pgio call " + "(req %s/%llu, %u bytes @ offset %llu)\n", + data->task.tk_pid, + data->header->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(data->header->inode), + data->args.count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } + rpc_put_task(task); +out: + return ret; +} +EXPORT_SYMBOL_GPL(nfs_initiate_pgio); + +/** + * nfs_pgio_error - Clean up from a pageio error + * @desc: IO descriptor + * @hdr: pageio header + */ +static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + set_bit(NFS_IOHDR_REDO, &hdr->flags); + nfs_pgio_data_release(hdr->data); + hdr->data = NULL; + desc->pg_completion_ops->error_cleanup(&desc->pg_list); + return -ENOMEM; +} + +/** + * nfs_pgio_release - Release pageio data + * @calldata: The pageio data to release + */ +static void nfs_pgio_release(void *calldata) +{ + struct nfs_pgio_data *data = calldata; + if (data->header->rw_ops->rw_release) + data->header->rw_ops->rw_release(data); + nfs_pgio_data_release(data); +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -254,6 +695,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, const struct nfs_pgio_completion_ops *compl_ops, + const struct nfs_rw_ops *rw_ops, size_t bsize, int io_flags) { @@ -267,6 +709,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; + desc->pg_rw_ops = rw_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; @@ -276,6 +719,107 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, EXPORT_SYMBOL_GPL(nfs_pageio_init); /** + * nfs_pgio_result - Basic pageio error handling + * @task: The task that ran + * @calldata: Pageio data to check + */ +static void nfs_pgio_result(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + struct inode *inode = data->header->inode; + + dprintk("NFS: %s: %5u, (status %d)\n", __func__, + task->tk_pid, task->tk_status); + + if (data->header->rw_ops->rw_done(task, data, inode) != 0) + return; + if (task->tk_status < 0) + nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + else + data->header->rw_ops->rw_result(task, data); +} + +/* + * Create an RPC task for the given read or write request and kick it. + * The page must have been locked by the caller. + * + * It may happen that the page we're passed is not marked dirty. + * This is the case if nfs_updatepage detects a conflicting request + * that has been written but not committed. + */ +int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_pgio_data *data; + struct list_head *head = &desc->pg_list; + struct nfs_commit_info cinfo; + + data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) + return nfs_pgio_error(desc, hdr); + + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); + pages = data->pages.pagevec; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); + *pages++ = req->wb_page; + } + + if ((desc->pg_ioflags & FLUSH_COND_STABLE) && + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) + desc->pg_ioflags &= ~FLUSH_COND_STABLE; + + /* Set up the argument struct */ + nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + hdr->data = data; + desc->pg_rpc_callops = &nfs_pgio_common_ops; + return 0; +} +EXPORT_SYMBOL_GPL(nfs_generic_pgio); + +static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) +{ + struct nfs_rw_header *rw_hdr; + struct nfs_pgio_header *hdr; + int ret; + + rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); + if (!rw_hdr) { + desc->pg_completion_ops->error_cleanup(&desc->pg_list); + return -ENOMEM; + } + hdr = &rw_hdr->header; + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pgio(desc, hdr); + if (ret == 0) + ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), + hdr->data, desc->pg_rpc_callops, + desc->pg_ioflags, 0); + if (atomic_dec_and_test(&hdr->refcnt)) + hdr->completion_ops->completion(hdr); + return ret; +} + +static bool nfs_match_open_context(const struct nfs_open_context *ctx1, + const struct nfs_open_context *ctx2) +{ + return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; +} + +static bool nfs_match_lock_context(const struct nfs_lock_context *l1, + const struct nfs_lock_context *l2) +{ + return l1->lockowner.l_owner == l2->lockowner.l_owner + && l1->lockowner.l_pid == l2->lockowner.l_pid; +} + +/** * nfs_can_coalesce_requests - test two requests for compatibility * @prev: pointer to nfs_page * @req: pointer to nfs_page @@ -290,21 +834,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_page *req, struct nfs_pageio_descriptor *pgio) { - if (req->wb_context->cred != prev->wb_context->cred) - return false; - if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner) - return false; - if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid) - return false; - if (req->wb_context->state != prev->wb_context->state) - return false; - if (req->wb_pgbase != 0) - return false; - if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) - return false; - if (req_offset(req) != req_offset(prev) + prev->wb_bytes) - return false; - return pgio->pg_ops->pg_test(pgio, prev, req); + size_t size; + + if (prev) { + if (!nfs_match_open_context(req->wb_context, prev->wb_context)) + return false; + if (req->wb_context->dentry->d_inode->i_flock != NULL && + !nfs_match_lock_context(req->wb_lock_context, + prev->wb_lock_context)) + return false; + if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + return false; + } + size = pgio->pg_ops->pg_test(pgio, prev, req); + WARN_ON_ONCE(size > req->wb_bytes); + if (size && size < req->wb_bytes) + req->wb_bytes = size; + return size > 0; } /** @@ -318,17 +864,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_page *prev = NULL; if (desc->pg_count != 0) { - struct nfs_page *prev; - prev = nfs_list_entry(desc->pg_list.prev); - if (!nfs_can_coalesce_requests(prev, req, desc)) - return 0; } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } + if (!nfs_can_coalesce_requests(prev, req, desc)) + return 0; nfs_list_remove_request(req); nfs_list_add_request(req, &desc->pg_list); desc->pg_count += req->wb_bytes; @@ -358,22 +903,72 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * @desc: destination io descriptor * @req: request * + * This may split a request into subrequests which are all part of the + * same page group. + * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - while (!nfs_pageio_do_add_request(desc, req)) { - desc->pg_moreio = 1; - nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - desc->pg_moreio = 0; - if (desc->pg_recoalesce) - return 0; - } + struct nfs_page *subreq; + unsigned int bytes_left = 0; + unsigned int offset, pgbase; + + nfs_page_group_lock(req); + + subreq = req; + bytes_left = subreq->wb_bytes; + offset = subreq->wb_offset; + pgbase = subreq->wb_pgbase; + + do { + if (!nfs_pageio_do_add_request(desc, subreq)) { + /* make sure pg_test call(s) did nothing */ + WARN_ON_ONCE(subreq->wb_bytes != bytes_left); + WARN_ON_ONCE(subreq->wb_offset != offset); + WARN_ON_ONCE(subreq->wb_pgbase != pgbase); + + nfs_page_group_unlock(req); + desc->pg_moreio = 1; + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + if (desc->pg_recoalesce) + return 0; + /* retry add_request for this subreq */ + nfs_page_group_lock(req); + continue; + } + + /* check for buggy pg_test call(s) */ + WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); + WARN_ON_ONCE(subreq->wb_bytes > bytes_left); + WARN_ON_ONCE(subreq->wb_bytes == 0); + + bytes_left -= subreq->wb_bytes; + offset += subreq->wb_bytes; + pgbase += subreq->wb_bytes; + + if (bytes_left) { + subreq = nfs_create_request(req->wb_context, + req->wb_page, + subreq, pgbase, bytes_left); + if (IS_ERR(subreq)) + goto err_ptr; + nfs_lock_request(subreq); + subreq->wb_offset = offset; + subreq->wb_index = req->wb_index; + } + } while (bytes_left > 0); + + nfs_page_group_unlock(req); return 1; +err_ptr: + desc->pg_error = PTR_ERR(subreq); + nfs_page_group_unlock(req); + return 0; } static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) @@ -386,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) desc->pg_count = 0; desc->pg_base = 0; desc->pg_recoalesce = 0; + desc->pg_moreio = 0; while (!list_empty(&head)) { struct nfs_page *req; @@ -472,3 +1068,13 @@ void nfs_destroy_nfspagecache(void) kmem_cache_destroy(nfs_page_cachep); } +static const struct rpc_call_ops nfs_pgio_common_ops = { + .rpc_call_prepare = nfs_pgio_prepare, + .rpc_call_done = nfs_pgio_result, + .rpc_release = nfs_pgio_release, +}; + +const struct nfs_pageio_ops nfs_pgio_rw_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_pgios, +}; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d00260b0810..6fdcd233d6f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -33,6 +33,7 @@ #include "internal.h" #include "pnfs.h" #include "iostat.h" +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -360,7 +361,7 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -static inline u64 +static u64 end_offset(u64 start, u64 len) { u64 end; @@ -376,9 +377,9 @@ end_offset(u64 start, u64 len) * start2 end2 * [----------------) */ -static inline int -lo_seg_contained(struct pnfs_layout_range *l1, - struct pnfs_layout_range *l2) +static bool +pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, + const struct pnfs_layout_range *l2) { u64 start1 = l1->offset; u64 end1 = end_offset(start1, l1->length); @@ -395,9 +396,9 @@ lo_seg_contained(struct pnfs_layout_range *l1, * start2 end2 * [----------------) */ -static inline int -lo_seg_intersecting(struct pnfs_layout_range *l1, - struct pnfs_layout_range *l2) +static bool +pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, + const struct pnfs_layout_range *l2) { u64 start1 = l1->offset; u64 end1 = end_offset(start1, l1->length); @@ -409,12 +410,22 @@ lo_seg_intersecting(struct pnfs_layout_range *l1, } static bool -should_free_lseg(struct pnfs_layout_range *lseg_range, - struct pnfs_layout_range *recall_range) +should_free_lseg(const struct pnfs_layout_range *lseg_range, + const struct pnfs_layout_range *recall_range) { return (recall_range->iomode == IOMODE_ANY || lseg_range->iomode == recall_range->iomode) && - lo_seg_intersecting(lseg_range, recall_range); + pnfs_lseg_range_intersecting(lseg_range, recall_range); +} + +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + if (!atomic_dec_and_test(&lseg->pls_refcount)) + return false; + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); + list_add(&lseg->pls_list, tmp_list); + return true; } /* Returns 1 if lseg is removed from list, 0 otherwise */ @@ -430,11 +441,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); - if (atomic_dec_and_test(&lseg->pls_refcount)) { - pnfs_layout_remove_lseg(lseg->pls_layout, lseg); - list_add(&lseg->pls_list, tmp_list); + if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; - } } return rv; } @@ -505,37 +513,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); -/* - * Called by the state manger to remove all layouts established under an - * expired lease. - */ -void -pnfs_destroy_all_layouts(struct nfs_client *clp) +static bool +pnfs_layout_add_bulk_destroy_list(struct inode *inode, + struct list_head *layout_list) { - struct nfs_server *server; struct pnfs_layout_hdr *lo; - LIST_HEAD(tmp_list); + bool ret = false; - nfs4_deviceid_mark_client_invalid(clp); - nfs4_deviceid_purge_client(clp); + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { + pnfs_get_layout_hdr(lo); + list_add(&lo->plh_bulk_destroy, layout_list); + ret = true; + } + spin_unlock(&inode->i_lock); + return ret; +} + +/* Caller must hold rcu_read_lock and clp->cl_lock */ +static int +pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, + struct nfs_server *server, + struct list_head *layout_list) +{ + struct pnfs_layout_hdr *lo, *next; + struct inode *inode; + + list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { + inode = igrab(lo->plh_inode); + if (inode == NULL) + continue; + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + spin_lock(&clp->cl_lock); + rcu_read_lock(); + return -EAGAIN; + } + return 0; +} + +static int +pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, + bool is_bulk_recall) +{ + struct pnfs_layout_hdr *lo; + struct inode *inode; + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(lseg_list); + int ret = 0; + + while (!list_empty(layout_list)) { + lo = list_entry(layout_list->next, struct pnfs_layout_hdr, + plh_bulk_destroy); + dprintk("%s freeing layout for inode %lu\n", __func__, + lo->plh_inode->i_ino); + inode = lo->plh_inode; + spin_lock(&inode->i_lock); + list_del_init(&lo->plh_bulk_destroy); + lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ + if (is_bulk_recall) + set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) + ret = -EAGAIN; + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&lseg_list); + pnfs_put_layout_hdr(lo); + iput(inode); + } + return ret; +} + +int +pnfs_destroy_layouts_byfsid(struct nfs_client *clp, + struct nfs_fsid *fsid, + bool is_recall) +{ + struct nfs_server *server; + LIST_HEAD(layout_list); spin_lock(&clp->cl_lock); rcu_read_lock(); +restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (!list_empty(&server->layouts)) - list_splice_init(&server->layouts, &tmp_list); + if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) + continue; + if (pnfs_layout_bulk_destroy_byserver_locked(clp, + server, + &layout_list) != 0) + goto restart; } rcu_read_unlock(); spin_unlock(&clp->cl_lock); - while (!list_empty(&tmp_list)) { - lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, - plh_layouts); - dprintk("%s freeing layout for inode %lu\n", __func__, - lo->plh_inode->i_ino); - list_del_init(&lo->plh_layouts); - pnfs_destroy_layout(NFS_I(lo->plh_inode)); + if (list_empty(&layout_list)) + return 0; + return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); +} + +int +pnfs_destroy_layouts_byclid(struct nfs_client *clp, + bool is_recall) +{ + struct nfs_server *server; + LIST_HEAD(layout_list); + + spin_lock(&clp->cl_lock); + rcu_read_lock(); +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (pnfs_layout_bulk_destroy_byserver_locked(clp, + server, + &layout_list) != 0) + goto restart; } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + + if (list_empty(&layout_list)) + return 0; + return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); +} + +/* + * Called by the state manger to remove all layouts established under an + * expired lease. + */ +void +pnfs_destroy_all_layouts(struct nfs_client *clp) +{ + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); + + pnfs_destroy_layouts_byclid(clp, false); } /* @@ -544,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) */ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) { - return (s32)s1 - (s32)s2 > 0; + return (s32)(s1 - s2) > 0; +} + +static void +pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, + const nfs4_stateid *new, + struct list_head *free_me_list) +{ + if (nfs4_stateid_match_other(&lo->plh_stateid, new)) + return; + /* Layout is new! Kill existing layout segments */ + pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); } /* update lo->plh_stateid with new if is more recent */ @@ -601,6 +730,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, spin_lock(&lo->plh_inode->i_lock); if (pnfs_layoutgets_blocked(lo, 1)) { status = -EAGAIN; + } else if (!nfs4_valid_open_stateid(open_state)) { + status = -EBADF; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -647,6 +778,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); lgp->gfp_flags = gfp_flags; + lgp->cred = lo->plh_lc_cred; /* Synchronously retrieve layout information from server and * store in lseg. @@ -667,6 +799,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, return lseg; } +static void pnfs_clear_layoutcommit(struct inode *inode, + struct list_head *head) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct pnfs_layout_segment *lseg, *tmp; + + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + return; + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { + if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + continue; + pnfs_lseg_dec_and_remove_zero(lseg, head); + } +} + /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -698,6 +845,7 @@ _pnfs_return_layout(struct inode *ino) /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); + pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { @@ -710,8 +858,6 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; @@ -727,6 +873,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.inode = ino; lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; + lrp->cred = lo->plh_lc_cred; status = nfs4_proc_layoutreturn(lrp); out: @@ -735,6 +882,33 @@ out: } EXPORT_SYMBOL_GPL(_pnfs_return_layout); +int +pnfs_commit_and_return_layout(struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + int ret; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo == NULL) { + spin_unlock(&inode->i_lock); + return 0; + } + pnfs_get_layout_hdr(lo); + /* Block new layoutgets and read/write to ds */ + lo->plh_block_lgets++; + spin_unlock(&inode->i_lock); + filemap_fdatawait(inode->i_mapping); + ret = pnfs_layoutcommit_inode(inode, true); + if (ret == 0) + ret = _pnfs_return_layout(inode); + spin_lock(&inode->i_lock); + lo->plh_block_lgets--; + spin_unlock(&inode->i_lock); + pnfs_put_layout_hdr(lo); + return ret; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; @@ -824,8 +998,8 @@ out: * are seen first. */ static s64 -cmp_layout(struct pnfs_layout_range *l1, - struct pnfs_layout_range *l2) +pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, + const struct pnfs_layout_range *l2) { s64 d; @@ -852,7 +1026,7 @@ pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); list_for_each_entry(lp, &lo->plh_segs, pls_list) { - if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) + if (pnfs_lseg_range_cmp(&lseg->pls_range, &lp->pls_range) > 0) continue; list_add_tail(&lseg->pls_list, &lp->pls_list); dprintk("%s: inserted lseg %p " @@ -888,9 +1062,9 @@ alloc_init_layout_hdr(struct inode *ino, atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); - INIT_LIST_HEAD(&lo->plh_bulk_recall); + INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; - lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); + lo->plh_lc_cred = get_rpccred(ctx->cred); return lo; } @@ -931,21 +1105,21 @@ out_existing: * READ READ true * READ RW true */ -static int -is_matching_lseg(struct pnfs_layout_range *ls_range, - struct pnfs_layout_range *range) +static bool +pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, + const struct pnfs_layout_range *range) { struct pnfs_layout_range range1; if ((range->iomode == IOMODE_RW && ls_range->iomode != IOMODE_RW) || - !lo_seg_intersecting(ls_range, range)) + !pnfs_lseg_range_intersecting(ls_range, range)) return 0; /* range1 covers only the first byte in the range */ range1 = *range; range1.length = 1; - return lo_seg_contained(ls_range, &range1); + return pnfs_lseg_range_contained(ls_range, &range1); } /* @@ -961,7 +1135,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && - is_matching_lseg(&lseg->pls_range, range)) { + pnfs_lseg_range_match(&lseg->pls_range, range)) { ret = pnfs_get_lseg(lseg); break; } @@ -1071,7 +1245,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; - bool first = false; + bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1105,10 +1279,9 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; atomic_inc(&lo->plh_outstanding); - if (list_empty(&lo->plh_segs)) - first = true; - + first = list_empty(&lo->plh_layouts) ? true : false; spin_unlock(&ino->i_lock); + if (first) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. @@ -1153,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; + LIST_HEAD(free_me); int status = 0; /* Inject layout blob into I/O device driver */ @@ -1179,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } + /* Check that the new stateid matches the old stateid */ + pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); /* Done processing layoutget. Set the layout stateid */ pnfs_set_layout_stateid(lo, &res->stateid, false); @@ -1193,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&free_me); return lseg; out: return ERR_PTR(status); @@ -1211,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_read_mds(pgio); - return; - } - if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else @@ -1240,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, { WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_write_mds(pgio); - return; - } - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), @@ -1257,68 +1424,63 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_read(pgio, inode, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); -} - -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, - int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); -} - -bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (pgio->pg_lseg == NULL) - return nfs_generic_pg_test(pgio, prev, req); + unsigned int size; + u64 seg_end, req_start, seg_left; + + size = nfs_generic_pg_test(pgio, prev, req); + if (!size) + return 0; /* - * Test if a nfs_page is fully contained in the pnfs_layout_range. - * Note that this test makes several assumptions: - * - that the previous nfs_page in the struct nfs_pageio_descriptor - * is known to lie within the range. - * - that the nfs_page being tested is known to be contiguous with the - * previous nfs_page. - * - Layout ranges are page aligned, so we only have to test the - * start offset of the request. + * 'size' contains the number of bytes left in the current page (up + * to the original size asked for in @req->wb_bytes). + * + * Calculate how many bytes are left in the layout segment + * and if there are less bytes than 'size', return that instead. * * Please also note that 'end_offset' is actually the offset of the * first byte that lies outside the pnfs_layout_range. FIXME? * */ - return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); + if (pgio->pg_lseg) { + seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + req_start = req_offset(req); + WARN_ON_ONCE(req_start > seg_end); + /* start of request is past the last byte of this segment */ + if (req_start >= seg_end) + return 0; + + /* adjust 'size' iff there are fewer bytes left in the + * segment than what nfs_generic_pg_test returned */ + seg_left = seg_end - req_start; + if (seg_left < size) + size = (unsigned int)seg_left; + } + + return size; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops) + const struct nfs_pgio_completion_ops *compl_ops, + struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); + pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1340,29 +1502,30 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops); + hdr->completion_ops, + hdr->dreq); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_write_data *data) +void pnfs_ld_write_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; + trace_nfs4_pnfs_write(data, hdr->pnfs_error); if (!hdr->pnfs_error) { pnfs_set_layoutcommit(data); hdr->mds_ops->rpc_call_done(&data->task, data); @@ -1374,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1383,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_writedata_release(data); + nfs_pgio_data_release(data); } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_data *wdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) @@ -1409,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, } static void -pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +pnfs_do_write(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_write_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); - } + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_writehdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); @@ -1454,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + pnfs_do_write(desc, hdr, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; @@ -1468,13 +1626,15 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops) + const struct nfs_pgio_completion_ops *compl_ops, + struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, true, compl_ops); + pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1492,29 +1652,30 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops); + hdr->completion_ops, + hdr->dreq); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_read_data *data) +void pnfs_ld_read_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; + trace_nfs4_pnfs_read(data, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(data); hdr->mds_ops->rpc_call_done(&data->task, data); @@ -1526,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1535,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_readdata_release(data); + nfs_pgio_data_release(data); } /* * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { @@ -1564,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, } static void -pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_read_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_read_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); - } + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_readhdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; @@ -1610,18 +1765,27 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_pagein(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_reads(desc, &hdr->rpc_list); + pnfs_do_read(desc, hdr); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); +static void pnfs_clear_layoutcommitting(struct inode *inode) +{ + unsigned long *bitlock = &NFS_I(inode)->flags; + + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); +} + /* * There can be multiple RW segments. */ @@ -1631,11 +1795,24 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { if (lseg->pls_range.iomode == IOMODE_RW && - test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) list_add(&lseg->pls_lc_list, listp); } } +static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) +{ + struct pnfs_layout_segment *lseg, *tmp; + + /* Matched by references in pnfs_set_layoutcommit */ + list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { + list_del_init(&lseg->pls_lc_list); + pnfs_put_lseg(lseg); + } + + pnfs_clear_layoutcommitting(inode); +} + void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); @@ -1643,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_write_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; @@ -1680,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) if (nfss->pnfs_curr_ld->cleanup_layoutcommit) nfss->pnfs_curr_ld->cleanup_layoutcommit(data); + pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); } /* @@ -1696,43 +1874,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) struct nfs4_layoutcommit_data *data; struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos; - int status = 0; + int status; - dprintk("--> %s inode %lu\n", __func__, inode->i_ino); - - if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + if (!pnfs_layoutcommit_outstanding(inode)) return 0; - /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ - data = kzalloc(sizeof(*data), GFP_NOFS); - if (!data) { - status = -ENOMEM; - goto out; - } - - if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) - goto out_free; + dprintk("--> %s inode %lu\n", __func__, inode->i_ino); + status = -EAGAIN; if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { - if (!sync) { - status = -EAGAIN; - goto out_free; - } - status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, - nfs_wait_bit_killable, TASK_KILLABLE); + if (!sync) + goto out; + status = wait_on_bit_lock(&nfsi->flags, + NFS_INO_LAYOUTCOMMITTING, + nfs_wait_bit_killable, + TASK_KILLABLE); if (status) - goto out_free; + goto out; } - INIT_LIST_HEAD(&data->lseg_list); + status = -ENOMEM; + /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ + data = kzalloc(sizeof(*data), GFP_NOFS); + if (!data) + goto clear_layoutcommitting; + + status = 0; spin_lock(&inode->i_lock); - if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { - clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); - spin_unlock(&inode->i_lock); - wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); - goto out_free; - } + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) + goto out_unlock; + INIT_LIST_HEAD(&data->lseg_list); pnfs_list_write_lseg(inode, &data->lseg_list); end_pos = nfsi->layout->plh_lwb; @@ -1755,8 +1927,11 @@ out: mark_inode_dirty_sync(inode); dprintk("<-- %s status %d\n", __func__, status); return status; -out_free: +out_unlock: + spin_unlock(&inode->i_lock); kfree(data); +clear_layoutcommitting: + pnfs_clear_layoutcommitting(inode); goto out; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index dbf7bba52da..4fb309a2b4c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); + enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type { struct pnfs_layout_hdr { atomic_t plh_refcount; struct list_head plh_layouts; /* other client layouts */ - struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ + struct list_head plh_bulk_destroy; struct list_head plh_segs; /* layout segments list */ nfs4_stateid plh_stateid; atomic_t plh_outstanding; /* number of RPCs out */ @@ -149,9 +149,10 @@ struct pnfs_device { struct nfs4_deviceid dev_id; unsigned int layout_type; unsigned int mincount; + unsigned int maxcount; /* gdia_maxcount */ struct page **pages; unsigned int pgbase; - unsigned int pglen; + unsigned int pglen; /* reply buffer length */ }; #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 @@ -170,7 +171,8 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, const struct nfs_fh *fh, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, - struct pnfs_device *dev); + struct pnfs_device *dev, + struct rpc_cred *cred); extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); @@ -178,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, - const struct nfs_pgio_completion_ops *); -void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, - int, const struct nfs_pgio_completion_ops *); - void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -190,12 +187,18 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); -bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); +int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, + struct nfs_fsid *fsid, + bool is_recall); +int pnfs_destroy_layouts_byclid(struct nfs_client *clp, + bool is_recall); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -210,12 +213,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_write_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_write_data *); -void pnfs_ld_read_done(struct nfs_read_data *); +int pnfs_commit_and_return_layout(struct inode *); +void pnfs_ld_write_done(struct nfs_pgio_data *); +void pnfs_ld_read_done(struct nfs_pgio_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -225,9 +229,11 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops); + const struct nfs_pgio_completion_ops *compl_ops, + struct nfs_direct_req *dreq); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops); + const struct nfs_pgio_completion_ops *compl_ops, + struct nfs_direct_req *dreq); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ @@ -265,7 +271,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { atomic_inc(&lseg->pls_refcount); - smp_mb__after_atomic_inc(); + smp_mb__after_atomic(); } return lseg; } @@ -349,6 +355,15 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) PNFS_LAYOUTRET_ON_SETATTR; } +static inline bool +pnfs_layoutcommit_outstanding(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + return test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags) != 0 || + test_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags) != 0; +} + static inline int pnfs_return_layout(struct inode *ino) { struct nfs_inode *nfsi = NFS_I(ino); @@ -400,6 +415,11 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +static inline int pnfs_commit_and_return_layout(struct inode *inode) +{ + return 0; +} + static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) { @@ -437,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_read(pgio, inode, compl_ops); -} - -static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) @@ -500,6 +508,13 @@ pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, return false; } +static inline bool +pnfs_layoutcommit_outstanding(struct inode *inode) +{ + return false; +} + + static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) { return NULL; diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index d35b62e83ea..6da209bd940 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->ld == ld && d->nfs_client == clp && !memcmp(&d->deviceid, id, sizeof(*id))) { if (atomic_read(&d->ref)) @@ -248,12 +247,11 @@ static void _deviceid_purge_client(const struct nfs_client *clp, long hash) { struct nfs4_deviceid_node *d; - struct hlist_node *n; HLIST_HEAD(tmp); spin_lock(&nfs4_deviceid_lock); rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node) if (d->nfs_client == clp && atomic_read(&d->ref)) { hlist_del_init_rcu(&d->node); hlist_add_head(&d->tmpnode, &tmp); @@ -291,12 +289,11 @@ void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) { struct nfs4_deviceid_node *d; - struct hlist_node *n; int i; rcu_read_lock(); for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ - hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node) if (d->nfs_client == clp) set_bit(NFS_DEVICEID_INVALID, &d->flags); } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f084dac948e..c171ce1a8a3 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -98,7 +98,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, */ static int nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, struct nfs4_label *label) { struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_GETATTR], @@ -146,7 +146,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, static int nfs_proc_lookup(struct inode *dir, struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + struct nfs4_label *label) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), @@ -234,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; int status = -ENOMEM; - dprintk("NFS call create %s\n", dentry->d_name.name); + dprintk("NFS call create %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; @@ -243,7 +244,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); nfs_free_createdata(data); out: dprintk("NFS reply create: %d\n", status); @@ -264,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode; int status = -ENOMEM; - dprintk("NFS call mknod %s\n", dentry->d_name.name); + dprintk("NFS call mknod %pd\n", dentry); mode = sattr->ia_mode; if (S_ISFIFO(mode)) { @@ -290,7 +291,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); nfs_free_createdata(data); out: dprintk("NFS reply mknod: %d\n", status); @@ -356,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, } static int -nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .old_name = old_name, - .new_dir = NFS_FH(new_dir), - .new_name = new_name, - }; - struct rpc_message msg = { - .rpc_proc = &nfs_procedures[NFSPROC_RENAME], - .rpc_argp = &arg, - }; - int status; - - dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_mark_for_revalidate(old_dir); - nfs_mark_for_revalidate(new_dir); - dprintk("NFS reply rename: %d\n", status); - return status; -} - -static int nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { struct nfs_linkargs arg = { @@ -422,7 +399,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, }; int status = -ENAMETOOLONG; - dprintk("NFS call symlink %s\n", dentry->d_name.name); + dprintk("NFS call symlink %pd\n", dentry); if (len > NFS2_MAXPATHLEN) goto out; @@ -442,7 +419,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, * should fill in the data with a LOOKUP call on the wire. */ if (status == 0) - status = nfs_instantiate(dentry, fh, fattr); + status = nfs_instantiate(dentry, fh, fattr, NULL); out_free: nfs_free_fattr(fattr); @@ -461,7 +438,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) }; int status = -ENOMEM; - dprintk("NFS call mkdir %s\n", dentry->d_name.name); + dprintk("NFS call mkdir %pd\n", dentry); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; @@ -471,7 +448,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); nfs_mark_for_revalidate(dir); if (status == 0) - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL); nfs_free_createdata(data); out: dprintk("NFS reply mkdir: %d\n", status); @@ -601,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -617,17 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); + return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -636,18 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - rpc_call_start(task); -} - static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { BUG(); @@ -662,7 +635,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) static int nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); } @@ -742,7 +715,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .unlink_setup = nfs_proc_unlink_setup, .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, .unlink_done = nfs_proc_unlink_done, - .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, .rename_done = nfs_proc_rename_done, @@ -756,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .fsinfo = nfs_proc_fsinfo, .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, + .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare, .read_setup = nfs_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, - .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, - .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a5e5d9899d5..e818a475ca6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -24,85 +24,24 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static const struct nfs_pageio_ops nfs_pageio_read_ops; -static const struct rpc_call_ops nfs_read_common_ops; static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; +static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc(void) +static struct nfs_rw_header *nfs_readhdr_alloc(void) { - struct nfs_read_header *rhdr; - - rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); - if (rhdr) { - struct nfs_pgio_header *hdr = &rhdr->header; - - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } - return rhdr; + return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static void nfs_readhdr_free(struct nfs_rw_header *rhdr) { - struct nfs_read_data *data, *prealloc; - - prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - -void nfs_readhdr_free(struct nfs_pgio_header *hdr) -{ - struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); - kmem_cache_free(nfs_rdata_cachep, rhdr); } -EXPORT_SYMBOL_GPL(nfs_readhdr_free); - -void nfs_readdata_release(struct nfs_read_data *rdata) -{ - struct nfs_pgio_header *hdr = rdata->header; - struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); - - put_nfs_open_context(rdata->args.context); - if (rdata->pages.pagevec != rdata->pages.page_array) - kfree(rdata->pages.pagevec); - if (rdata == &read_header->rpc_data) { - rdata->header = NULL; - rdata = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(rdata); -} -EXPORT_SYMBOL_GPL(nfs_readdata_release); static int nfs_return_empty_page(struct page *page) @@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page) } void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, - NFS_SERVER(inode)->rsize, 0); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_read_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, + server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { - pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); @@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, inode, page, 0, len); + new = nfs_create_request(ctx, page, NULL, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -158,20 +105,31 @@ static void nfs_readpage_release(struct nfs_page *req) { struct inode *d_inode = req->wb_context->dentry->d_inode; - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + (long long)req_offset(req)); - unlock_page(req->wb_page); + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); - dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", + unlock_page(req->wb_page); + } + + dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); nfs_release_request(req); } -/* Note io was page aligned */ +static void nfs_page_group_set_uptodate(struct nfs_page *req) +{ + if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) + SetPageUptodate(req->wb_page); +} + static void nfs_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; + unsigned long start = req->wb_pgbase; + unsigned long end = req->wb_pgbase + req->wb_bytes; if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); + /* note: regions of the page not covered by a + * request are zeroed in nfs_readpage_async / + * readpage_async_filler */ + if (bytes > hdr->good_bytes) { + /* nothing in this request was good, so zero + * the full extent of the request */ + zero_user_segment(page, start, end); + + } else if (hdr->good_bytes - bytes < req->wb_bytes) { + /* part of this request has good bytes, but + * not all. zero the bad bytes */ + start += hdr->good_bytes - bytes; + WARN_ON(start < req->wb_pgbase); + zero_user_segment(page, start, end); + } } bytes += req->wb_bytes; if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); } else - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); nfs_list_remove_request(req); nfs_readpage_release(req); } @@ -203,95 +172,14 @@ out: hdr->release(hdr); } -int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, int flags) +static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = clnt, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | swap_flags | flags, - }; - - /* Set up the initial task struct. */ - NFS_PROTO(inode)->read_setup(data, &msg); - - dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " - "offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; -} -EXPORT_SYMBOL_GPL(nfs_initiate_read); -/* - * Set up the NFS read request struct - */ -static void nfs_read_rpcsetup(struct nfs_read_data *data, - unsigned int count, unsigned int offset) -{ - struct nfs_page *req = data->header->req; - - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - nfs_fattr_init(&data->fattr); -} - -static int nfs_do_read(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) -{ - struct inode *inode = data->header->inode; - - return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); -} - -static int -nfs_do_multiple_reads(struct list_head *head, - const struct rpc_call_ops *call_ops) -{ - struct nfs_read_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_read_data, list); - list_del_init(&data->list); - - ret2 = nfs_do_read(data, call_ops); - if (ret == 0) - ret = ret2; - } - return ret; + task_setup_data->flags |= swap_flags; + NFS_PROTO(inode)->read_setup(data, msg); } static void @@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; -static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_read_data, list); - list_del(&data->list); - nfs_readdata_release(data); - } - desc->pg_completion_ops->error_cleanup(&desc->pg_list); -} - -/* - * Generate multiple requests to fill a single page. - * - * We optimize to reduce the number of read operations on the wire. If we - * detect that we're reading a page, or an area of a page, that is past the - * end of file, we do not generate NFS read operations but just clear the - * parts of the page that would have come back zero from the server anyway. - * - * We rely on the cached value of i_size to make this determination; another - * client can fill pages on the server past our cached end-of-file, but we - * won't see the new data until our attribute cache is updated. This is more - * or less conventional NFS client behavior. - */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_read_data *data; - size_t rsize = desc->pg_bsize, nbytes; - unsigned int offset; - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes,rsize); - - data = nfs_readdata_alloc(hdr, 1); - if (!data) { - nfs_pagein_error(desc, hdr); - return -ENOMEM; - } - data->pages.pagevec[0] = page; - nfs_read_rpcsetup(data, len, offset); - list_add(&data->list, &hdr->rpc_list); - nbytes -= len; - offset += len; - } while (nbytes != 0); - - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_read_common_ops; - return 0; -} - -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req; - struct page **pages; - struct nfs_read_data *data; - struct list_head *head = &desc->pg_list; - - data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { - nfs_pagein_error(desc, hdr); - return -ENOMEM; - } - - pages = data->pages.pagevec; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; - } - - nfs_read_rpcsetup(data, desc->pg_count, 0); - list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_read_common_ops; - return 0; -} - -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc, hdr); - return nfs_pagein_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_pagein); - -static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) -{ - struct nfs_read_header *rhdr; - struct nfs_pgio_header *hdr; - int ret; - - rhdr = nfs_readhdr_alloc(); - if (!rhdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); - return -ENOMEM; - } - hdr = &rhdr->header; - nfs_pgheader_init(desc, hdr, nfs_readhdr_free); - atomic_inc(&hdr->refcnt); - ret = nfs_generic_pagein(desc, hdr); - if (ret == 0) - ret = nfs_do_multiple_reads(&hdr->rpc_list, - desc->pg_rpc_callops); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - return ret; -} - -static const struct nfs_pageio_ops nfs_pageio_read_ops = { - .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_readpages, -}; - /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct inode *inode = data->header->inode; - int status; - - dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, - task->tk_status); - - status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; @@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_readargs *argp = &data->args; - struct nfs_readres *resp = &data->res; + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; /* This is a short read! */ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); @@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data rpc_restart_call_prepare(task); } -static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) +static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_read_data *data = calldata; struct nfs_pgio_header *hdr = data->header; - /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); - else if (data->res.eof) { + if (data->res.eof) { loff_t bound; bound = data->args.offset + data->res.count; @@ -505,23 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) nfs_readpage_retry(task, data); } -static void nfs_readpage_release_common(void *calldata) -{ - nfs_readdata_release(calldata); -} - -void nfs_read_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); -} - -static const struct rpc_call_ops nfs_read_common_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_readpage_result_common, - .rpc_release = nfs_readpage_release_common, -}; - /* * Read a page over NFS. * We read the page synchronously in the following case: @@ -589,7 +325,6 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; @@ -598,7 +333,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, inode, page, 0, len); + new = nfs_create_request(desc->ctx, page, NULL, 0, len); if (IS_ERR(new)) goto out_error; @@ -627,9 +362,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, unsigned long npages; int ret = -ESTALE; - dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + dprintk("NFS: nfs_readpages (%s/%Lu %d)\n", inode->i_sb->s_id, - (long long)NFS_FILEID(inode), + (unsigned long long)NFS_FILEID(inode), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); @@ -651,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); @@ -668,7 +404,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) @@ -681,3 +417,12 @@ void nfs_destroy_readpagecache(void) { kmem_cache_destroy(nfs_rdata_cachep); } + +static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_mode = FMODE_READ, + .rw_alloc_header = nfs_readhdr_alloc, + .rw_free_header = nfs_readhdr_free, + .rw_done = nfs_readpage_done, + .rw_result = nfs_readpage_result, + .rw_initiate = nfs_initiate_read, +}; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2e7e8c878e5..084af1060d7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -31,6 +31,7 @@ #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> @@ -54,7 +55,6 @@ #include <linux/parser.h> #include <linux/nsproxy.h> #include <linux/rcupdate.h> -#include <linux/kthread.h> #include <asm/uaccess.h> @@ -269,7 +269,7 @@ static match_table_t nfs_local_lock_tokens = { enum { Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, + Opt_vers_4_1, Opt_vers_4_2, Opt_vers_err }; @@ -280,6 +280,7 @@ static match_table_t nfs_vers_tokens = { { Opt_vers_4, "4" }, { Opt_vers_4_0, "4.0" }, { Opt_vers_4_1, "4.1" }, + { Opt_vers_4_2, "4.2" }, { Opt_vers_err, NULL } }; @@ -292,8 +293,9 @@ struct file_system_type nfs_fs_type = { .name = "nfs", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs"); EXPORT_SYMBOL_GPL(nfs_fs_type); struct file_system_type nfs_xdev_fs_type = { @@ -301,7 +303,7 @@ struct file_system_type nfs_xdev_fs_type = { .name = "nfs", .mount = nfs_xdev_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; const struct super_operations nfs_sops = { @@ -331,8 +333,10 @@ struct file_system_type nfs4_fs_type = { .name = "nfs4", .mount = nfs_fs_mount, .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, }; +MODULE_ALIAS_FS("nfs4"); +MODULE_ALIAS("nfs4"); EXPORT_SYMBOL_GPL(nfs4_fs_type); static int __init register_nfs4_fs(void) @@ -356,7 +360,8 @@ static void unregister_nfs4_fs(void) #endif static struct shrinker acl_shrinker = { - .shrink = nfs_access_cache_shrinker, + .count_objects = nfs_access_cache_count, + .scan_objects = nfs_access_cache_scan, .seeks = DEFAULT_SEEKS, }; @@ -418,54 +423,6 @@ void nfs_sb_deactive(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_sb_deactive); -static int nfs_deactivate_super_async_work(void *ptr) -{ - struct super_block *sb = ptr; - - deactivate_super(sb); - module_put_and_exit(0); - return 0; -} - -/* - * same effect as deactivate_super, but will do final unmount in kthread - * context - */ -static void nfs_deactivate_super_async(struct super_block *sb) -{ - struct task_struct *task; - char buf[INET6_ADDRSTRLEN + 1]; - struct nfs_server *server = NFS_SB(sb); - struct nfs_client *clp = server->nfs_client; - - if (!atomic_add_unless(&sb->s_active, -1, 1)) { - rcu_read_lock(); - snprintf(buf, sizeof(buf), - rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); - rcu_read_unlock(); - - __module_get(THIS_MODULE); - task = kthread_run(nfs_deactivate_super_async_work, sb, - "%s-deactivate-super", buf); - if (IS_ERR(task)) { - pr_err("%s: kthread_run: %ld\n", - __func__, PTR_ERR(task)); - /* make synchronous call and hope for the best */ - deactivate_super(sb); - module_put(THIS_MODULE); - } - } -} - -void nfs_sb_deactive_async(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - if (atomic_dec_and_test(&server->active)) - nfs_deactivate_super_async(sb); -} -EXPORT_SYMBOL_GPL(nfs_sb_deactive_async); - /* * Deliver file system statistics to userspace */ @@ -540,7 +497,8 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) static const struct { rpc_authflavor_t flavour; const char *str; - } sec_flavours[] = { + } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = { + /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */ { RPC_AUTH_NULL, "null" }, { RPC_AUTH_UNIX, "sys" }, { RPC_AUTH_GSS_KRB5, "krb5" }, @@ -877,6 +835,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); show_sessions(m, nfss); show_pnfs(m, nfss); @@ -965,8 +924,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->mount_server.port = NFS_UNSPEC_PORT; data->nfs_server.port = NFS_UNSPEC_PORT; data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->auth_flavors[0] = RPC_AUTH_UNIX; - data->auth_flavor_len = 1; + data->selected_flavor = RPC_AUTH_MAXFLAVOR; data->minorversion = 0; data->need_mount = true; data->net = current->nsproxy->net_ns; @@ -1062,55 +1020,108 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) } /* + * Add 'flavor' to 'auth_info' if not already present. + * Returns true if 'flavor' ends up in the list, false otherwise + */ +static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) +{ + unsigned int i; + unsigned int max_flavor_len = (sizeof(auth_info->flavors) / + sizeof(auth_info->flavors[0])); + + /* make sure this flavor isn't already in the list */ + for (i = 0; i < auth_info->flavor_len; i++) { + if (flavor == auth_info->flavors[i]) + return true; + } + + if (auth_info->flavor_len + 1 >= max_flavor_len) { + dfprintk(MOUNT, "NFS: too many sec= flavors\n"); + return false; + } + + auth_info->flavors[auth_info->flavor_len++] = flavor; + return true; +} + +/* + * Return true if 'match' is in auth_info or auth_info is empty. + * Return false otherwise. + */ +bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, + rpc_authflavor_t match) +{ + int i; + + if (!auth_info->flavor_len) + return true; + + for (i = 0; i < auth_info->flavor_len; i++) { + if (auth_info->flavors[i] == match) + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(nfs_auth_info_match); + +/* * Parse the value of the 'sec=' option. */ static int nfs_parse_security_flavors(char *value, struct nfs_parsed_mount_data *mnt) { substring_t args[MAX_OPT_ARGS]; + rpc_authflavor_t pseudoflavor; + char *p; dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - switch (match_token(value, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - mnt->auth_flavors[0] = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - mnt->auth_flavors[0] = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; - break; - default: - return 0; + while ((p = strsep(&value, ":")) != NULL) { + switch (match_token(p, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + dfprintk(MOUNT, + "NFS: sec= option '%s' not recognized\n", p); + return 0; + } + + if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) + return 0; } - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - mnt->auth_flavor_len = 1; return 1; } @@ -1142,6 +1153,10 @@ static int nfs_parse_version_string(char *string, mnt->version = 4; mnt->minorversion = 1; break; + case Opt_vers_4_2: + mnt->version = 4; + mnt->minorversion = 2; + break; default: return 0; } @@ -1599,7 +1614,7 @@ static int nfs_parse_mount_options(char *raw, goto out_minorversion_mismatch; if (mnt->options & NFS_OPTION_MIGRATION && - mnt->version != 4 && mnt->minorversion != 0) + (mnt->version != 4 || mnt->minorversion != 0)) goto out_migration_misuse; /* @@ -1653,49 +1668,40 @@ out_security_failure: } /* - * Match the requested auth flavors with the list returned by - * the server. Returns zero and sets the mount's authentication - * flavor on success; returns -EACCES if server does not support - * the requested flavor. + * Ensure that a specified authtype in args->auth_info is supported by + * the server. Returns 0 and sets args->selected_flavor if it's ok, and + * -EACCES if not. */ -static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, - struct nfs_mount_request *request) +static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, + rpc_authflavor_t *server_authlist, unsigned int count) { - unsigned int i, j, server_authlist_len = *(request->auth_flav_len); + rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; + unsigned int i; /* - * Certain releases of Linux's mountd return an empty - * flavor list. To prevent behavioral regression with - * these servers (ie. rejecting mounts that used to - * succeed), revert to pre-2.6.32 behavior (no checking) - * if the returned flavor list is empty. - */ - if (server_authlist_len == 0) - return 0; - - /* - * We avoid sophisticated negotiating here, as there are - * plenty of cases where we can get it wrong, providing - * either too little or too much security. + * If the sec= mount option is used, the specified flavor or AUTH_NULL + * must be in the list returned by the server. * - * RFC 2623, section 2.7 suggests we SHOULD prefer the - * flavor listed first. However, some servers list - * AUTH_NULL first. Our caller plants AUTH_SYS, the - * preferred default, in args->auth_flavors[0] if user - * didn't specify sec= mount option. + * AUTH_NULL has a special meaning when it's in the server list - it + * means that the server will ignore the rpc creds, so any flavor + * can be used. */ - for (i = 0; i < args->auth_flavor_len; i++) - for (j = 0; j < server_authlist_len; j++) - if (args->auth_flavors[i] == request->auth_flavs[j]) { - dfprintk(MOUNT, "NFS: using auth flavor %d\n", - request->auth_flavs[j]); - args->auth_flavors[0] = request->auth_flavs[j]; - return 0; - } + for (i = 0; i < count; i++) { + flavor = server_authlist[i]; + + if (nfs_auth_info_match(&args->auth_info, flavor) || + flavor == RPC_AUTH_NULL) + goto out; + } - dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); - nfs_umount(request); + dfprintk(MOUNT, + "NFS: specified auth flavors not supported by server\n"); return -EACCES; + +out: + args->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); + return 0; } /* @@ -1703,10 +1709,10 @@ static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, * corresponding to the provided path. */ static int nfs_request_mount(struct nfs_parsed_mount_data *args, - struct nfs_fh *root_fh) + struct nfs_fh *root_fh, + rpc_authflavor_t *server_authlist, + unsigned int *server_authlist_len) { - rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; - unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); struct nfs_mount_request request = { .sap = (struct sockaddr *) &args->mount_server.address, @@ -1714,7 +1720,7 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, .protocol = args->mount_server.protocol, .fh = root_fh, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, - .auth_flav_len = &server_authlist_len, + .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, .net = args->net, }; @@ -1758,29 +1764,93 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return status; } + return 0; +} + +static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, + struct nfs_subversion *nfs_mod) +{ + int status; + unsigned int i; + bool tried_auth_unix = false; + bool auth_null_in_list = false; + struct nfs_server *server = ERR_PTR(-EACCES); + struct nfs_parsed_mount_data *args = mount_info->parsed; + rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; + unsigned int authlist_len = ARRAY_SIZE(authlist); + + status = nfs_request_mount(args, mount_info->mntfh, authlist, + &authlist_len); + if (status) + return ERR_PTR(status); + /* - * MNTv1 (NFSv2) does not support auth flavor negotiation. + * Was a sec= authflavor specified in the options? First, verify + * whether the server supports it, and then just try to use it if so. */ - if (args->mount_server.version != NFS_MNT3_VERSION) - return 0; - return nfs_walk_authlist(args, &request); + if (args->auth_info.flavor_len > 0) { + status = nfs_verify_authflavors(args, authlist, authlist_len); + dfprintk(MOUNT, "NFS: using auth flavor %u\n", + args->selected_flavor); + if (status) + return ERR_PTR(status); + return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + } + + /* + * No sec= option was provided. RFC 2623, section 2.7 suggests we + * SHOULD prefer the flavor listed first. However, some servers list + * AUTH_NULL first. Avoid ever choosing AUTH_NULL. + */ + for (i = 0; i < authlist_len; ++i) { + rpc_authflavor_t flavor; + struct rpcsec_gss_info info; + + flavor = authlist[i]; + switch (flavor) { + case RPC_AUTH_UNIX: + tried_auth_unix = true; + break; + case RPC_AUTH_NULL: + auth_null_in_list = true; + continue; + default: + if (rpcauth_get_gssinfo(flavor, &info) != 0) + continue; + /* Fallthrough */ + } + dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); + args->selected_flavor = flavor; + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + if (!IS_ERR(server)) + return server; + } + + /* + * Nothing we tried so far worked. At this point, give up if we've + * already tried AUTH_UNIX or if the server's list doesn't contain + * AUTH_NULL + */ + if (tried_auth_unix || !auth_null_in_list) + return server; + + /* Last chance! Try AUTH_UNIX */ + dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); + args->selected_flavor = RPC_AUTH_UNIX; + return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); } struct dentry *nfs_try_mount(int flags, const char *dev_name, struct nfs_mount_info *mount_info, struct nfs_subversion *nfs_mod) { - int status; struct nfs_server *server; - if (mount_info->parsed->need_mount) { - status = nfs_request_mount(mount_info->parsed, mount_info->mntfh); - if (status) - return ERR_PTR(status); - } + if (mount_info->parsed->need_mount) + server = nfs_try_mount_request(mount_info, nfs_mod); + else + server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - /* Get a volume representation */ - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (IS_ERR(server)) return ERR_CAST(server); @@ -1883,6 +1953,7 @@ static int nfs23_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; + int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; if (data == NULL) goto out_no_data; @@ -1898,6 +1969,8 @@ static int nfs23_validate_mount_data(void *options, goto out_no_v3; data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + /* Turn off security negotiation */ + extra_flags |= NFS_MOUNT_SECFLAVOUR; case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; @@ -1925,7 +1998,7 @@ static int nfs23_validate_mount_data(void *options, * can deal with. */ args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= NFS_MOUNT_LEGACY_INTERFACE; + args->flags |= extra_flags; args->rsize = data->rsize; args->wsize = data->wsize; args->timeo = data->timeo; @@ -1950,7 +2023,9 @@ static int nfs23_validate_mount_data(void *options, args->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->auth_flavors[0] = data->pseudoflavor; + args->selected_flavor = data->pseudoflavor; + else + args->selected_flavor = RPC_AUTH_UNIX; if (!args->nfs_server.hostname) goto out_nomem; @@ -2073,6 +2148,8 @@ static int nfs_validate_text_mount_data(void *options, max_namelen = NFS4_MAXNAMLEN; max_pathlen = NFS4_MAXPATHLEN; nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; nfs4_validate_mount_flags(args); #else goto out_v4_not_compiled; @@ -2082,9 +2159,6 @@ static int nfs_validate_text_mount_data(void *options, nfs_set_port(sap, &args->nfs_server.port, port); - if (args->auth_flavor_len > 1) - goto out_bad_auth; - return nfs_parse_devname(dev_name, &args->nfs_server.hostname, max_namelen, @@ -2095,26 +2169,40 @@ static int nfs_validate_text_mount_data(void *options, out_v4_not_compiled: dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); return -EPROTONOSUPPORT; +#else +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; #endif /* !CONFIG_NFS_V4 */ out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; - -out_bad_auth: - dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n"); - return -EINVAL; } +#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ + | NFS_MOUNT_SECURE \ + | NFS_MOUNT_TCP \ + | NFS_MOUNT_VER3 \ + | NFS_MOUNT_KERBEROS \ + | NFS_MOUNT_NONLM \ + | NFS_MOUNT_BROKEN_SUID \ + | NFS_MOUNT_STRICTLOCK \ + | NFS_MOUNT_UNSHARED \ + | NFS_MOUNT_NORESVPORT \ + | NFS_MOUNT_LEGACY_INTERFACE) + static int nfs_compare_remount_data(struct nfs_server *nfss, struct nfs_parsed_mount_data *data) { - if (data->flags != nfss->flags || + if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || data->rsize != nfss->rsize || data->wsize != nfss->wsize || + data->version != nfss->nfs_client->rpc_ops->version || + data->minorversion != nfss->nfs_client->cl_minorversion || data->retrans != nfss->client->cl_timeout->to_retries || - data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || + data->selected_flavor != nfss->client->cl_auth->au_flavor || data->acregmin != nfss->acregmin / HZ || data->acregmax != nfss->acregmax / HZ || data->acdirmin != nfss->acdirmin / HZ || @@ -2139,6 +2227,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; u32 nfsvers = nfss->nfs_client->rpc_ops->version; + sync_filesystem(sb); + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which @@ -2159,7 +2249,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->rsize = nfss->rsize; data->wsize = nfss->wsize; data->retrans = nfss->client->cl_timeout->to_retries; - data->auth_flavors[0] = nfss->client->cl_auth->au_flavor; + data->selected_flavor = nfss->client->cl_auth->au_flavor; + data->auth_info = nfss->auth_info; data->acregmin = nfss->acregmin / HZ; data->acregmax = nfss->acregmax / HZ; data->acdirmin = nfss->acdirmin / HZ; @@ -2167,12 +2258,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; data->nfs_server.port = nfss->port; data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + data->version = nfsvers; + data->minorversion = nfss->nfs_client->cl_minorversion; + data->net = current->nsproxy->net_ns; memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, data->nfs_server.addrlen); /* overwrite those values with any that were specified */ - error = nfs_parse_mount_options((char *)options, data); - if (error < 0) + error = -EINVAL; + if (!nfs_parse_mount_options((char *)options, data)) goto out; /* @@ -2276,7 +2370,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->nfs_client != b->nfs_client) goto Ebusy; - if (a->flags != b->flags) + if ((a->flags ^ b->flags) & NFS_MOUNT_CMP_FLAGMASK) goto Ebusy; if (a->wsize != b->wsize) goto Ebusy; @@ -2290,7 +2384,8 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) + if (b->auth_info.flavor_len > 0 && + clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; Ebusy: @@ -2418,7 +2513,21 @@ static int nfs_bdi_register(struct nfs_server *server) int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) { - return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts); + int error; + unsigned long kflags = 0, kflags_out = 0; + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) + kflags |= SECURITY_LSM_NATIVE_LABELS; + + error = security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts, + kflags, &kflags_out); + if (error) + goto err; + + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && + !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) + NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; +err: + return error; } EXPORT_SYMBOL_GPL(nfs_set_sb_security); @@ -2426,10 +2535,9 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, struct nfs_mount_info *mount_info) { /* clone any lsm security options from the parent to the new sb */ - security_sb_clone_mnt_opts(mount_info->cloned->sb, s); if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) return -ESTALE; - return 0; + return security_sb_clone_mnt_opts(mount_info->cloned->sb, s); } EXPORT_SYMBOL_GPL(nfs_clone_sb_security); @@ -2454,6 +2562,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, if (server->flags & NFS_MOUNT_NOAC) sb_mntdata.mntflags |= MS_SYNCHRONOUS; + if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) + if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { @@ -2470,6 +2582,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, mntroot = ERR_PTR(error); goto error_splat_bdi; } + server->super = s; } if (!s->s_root) { @@ -2589,27 +2702,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot = ERR_PTR(-ENOMEM); struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; - int error; - dprintk("--> nfs_xdev_mount_common()\n"); + dprintk("--> nfs_xdev_mount()\n"); mount_info.mntfh = mount_info.cloned->fh; /* create a new volume representation */ server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - if (IS_ERR(server)) { - error = PTR_ERR(server); - goto out_err; - } - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); - dprintk("<-- nfs_xdev_mount_common() = 0\n"); -out: - return mntroot; + if (IS_ERR(server)) + mntroot = ERR_CAST(server); + else + mntroot = nfs_fs_mount_common(server, flags, + dev_name, &mount_info, nfs_mod); -out_err: - dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); - goto out; + dprintk("<-- nfs_xdev_mount() = %ld\n", + IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); + return mntroot; } #if IS_ENABLED(CONFIG_NFS_V4) @@ -2650,13 +2759,16 @@ static int nfs4_validate_mount_data(void *options, args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { + rpc_authflavor_t pseudoflavor; if (data->auth_flavourlen > 1) goto out_inval_auth; - if (copy_from_user(&args->auth_flavors[0], + if (copy_from_user(&pseudoflavor, data->auth_flavours, - sizeof(args->auth_flavors[0]))) + sizeof(pseudoflavor))) return -EFAULT; - } + args->selected_flavor = pseudoflavor; + } else + args->selected_flavor = RPC_AUTH_UNIX; c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) @@ -2690,6 +2802,8 @@ static int nfs4_validate_mount_data(void *options, args->acdirmax = data->acdirmax; args->nfs_server.protocol = data->proto; nfs_validate_transport_protocol(args); + if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; break; default: @@ -2710,6 +2824,10 @@ out_inval_auth: out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; + +out_invalid_transport_udp: + dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); + return -EINVAL; } /* @@ -2725,6 +2843,7 @@ bool nfs4_disable_idmapping = true; unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; unsigned short send_implementation_id = 1; char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; +bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); EXPORT_SYMBOL_GPL(nfs_callback_tcpport); @@ -2733,6 +2852,7 @@ EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); EXPORT_SYMBOL_GPL(send_implementation_id); EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier); +EXPORT_SYMBOL_GPL(recover_lost_locks); #define NFS_CALLBACK_MAXPORTNR (65535U) @@ -2769,6 +2889,11 @@ module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, "Send implementation ID with NFSv4.1 exchange_id"); MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); -MODULE_ALIAS("nfs4"); + +module_param(recover_lost_locks, bool, 0644); +MODULE_PARM_DESC(recover_lost_locks, + "If the server reports that a lock might be lost, " + "try to recover it risking data corruption."); + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 6b3f2535a3e..bb6ed810fa6 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -13,7 +13,7 @@ static struct ctl_table_header *nfs_callback_sysctl_table; -static ctl_table nfs_cb_sysctls[] = { +static struct ctl_table nfs_cb_sysctls[] = { { .procname = "nfs_mountpoint_timeout", .data = &nfs_mountpoint_expiry_timeout, @@ -31,7 +31,7 @@ static ctl_table nfs_cb_sysctls[] = { { } }; -static ctl_table nfs_cb_sysctl_dir[] = { +static struct ctl_table nfs_cb_sysctl_dir[] = { { .procname = "nfs", .mode = 0555, @@ -40,7 +40,7 @@ static ctl_table nfs_cb_sysctl_dir[] = { { } }; -static ctl_table nfs_cb_sysctl_root[] = { +static struct ctl_table nfs_cb_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3f79c77153b..de54129336c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -14,12 +14,15 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/namei.h> +#include <linux/fsnotify.h> #include "internal.h" #include "nfs4_fs.h" #include "iostat.h" #include "delegation.h" +#include "nfstrace.h" + /** * nfs_free_unlinkdata - release data from a sillydelete operation. * @data: pointer to unlink structure. @@ -77,6 +80,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct nfs_unlinkdata *data = calldata; struct inode *dir = data->dir; + trace_nfs_sillyrename_unlink(data, task->tk_status); if (!NFS_PROTO(dir)->unlink_done(task, dir)) rpc_restart_call_prepare(task); } @@ -95,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); - nfs_sb_deactive_async(sb); + nfs_sb_deactive(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) @@ -204,6 +208,13 @@ out_free: return ret; } +void nfs_wait_on_sillyrename(struct dentry *dentry) +{ + struct nfs_inode *nfsi = NFS_I(dentry->d_inode); + + wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1); +} + void nfs_block_sillyrename(struct dentry *dentry) { struct nfs_inode *nfsi = NFS_I(dentry->d_inode); @@ -268,8 +279,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) * point dentry is definitely not a root, so we won't need * that anymore. */ - if (devname_garbage) - kfree(devname_garbage); + kfree(devname_garbage); return 0; out_unlock: spin_unlock(&dentry->d_lock); @@ -336,20 +346,16 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; - struct dentry *new_dentry = data->new_dentry; + trace_nfs_sillyrename_rename(old_dir, old_dentry, + new_dir, data->new_dentry, task->tk_status); if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); return; } - if (task->tk_status != 0) { - nfs_cancel_async_unlink(old_dentry); - return; - } - - d_drop(old_dentry); - d_drop(new_dentry); + if (data->complete) + data->complete(task, data); } /** @@ -394,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = { * * It's expected that valid references to the dentries and inodes are held */ -static struct rpc_task * +struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, - struct dentry *old_dentry, struct dentry *new_dentry) + struct dentry *old_dentry, struct dentry *new_dentry, + void (*complete)(struct rpc_task *, struct nfs_renamedata *)) { struct nfs_renamedata *data; struct rpc_message msg = { }; @@ -433,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, data->new_dentry = dget(new_dentry); nfs_fattr_init(&data->old_fattr); nfs_fattr_init(&data->new_fattr); + data->complete = complete; /* set up nfs_renameargs */ data->args.old_dir = NFS_FH(old_dir); @@ -451,6 +459,35 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, return rpc_run_task(&task_setup_data); } +/* + * Perform tasks needed when a sillyrename is done such as cancelling the + * queued async unlink if it failed. + */ +static void +nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) +{ + struct dentry *dentry = data->old_dentry; + + if (task->tk_status != 0) { + nfs_cancel_async_unlink(dentry); + return; + } + + /* + * vfs_unlink and the like do not issue this when a file is + * sillyrenamed, so do it here. + */ + fsnotify_nameremove(dentry, 0); +} + +#define SILLYNAME_PREFIX ".nfs" +#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) +#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) +#define SILLYNAME_COUNTER_LEN ((unsigned)sizeof(unsigned int) << 1) +#define SILLYNAME_LEN (SILLYNAME_PREFIX_LEN + \ + SILLYNAME_FILEID_LEN + \ + SILLYNAME_COUNTER_LEN) + /** * nfs_sillyrename - Perform a silly-rename of a dentry * @dir: inode of directory that contains dentry @@ -476,43 +513,39 @@ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; - const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; - char silly[slen+1]; + unsigned char silly[SILLYNAME_LEN + 1]; + unsigned long long fileid; struct dentry *sdentry; struct rpc_task *task; - int error = -EIO; + int error = -EBUSY; - dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - dentry->d_count); + dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n", + dentry, d_count(dentry)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* * We don't allow a dentry to be silly-renamed twice. */ - error = -EBUSY; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); + fileid = NFS_FILEID(dentry->d_inode); /* Return delegation in anticipation of the rename */ NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode); sdentry = NULL; do { - char *suffix = silly + slen - countersize; - + int slen; dput(sdentry); sillycounter++; - sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); + slen = scnprintf(silly, sizeof(silly), + SILLYNAME_PREFIX "%0*llx%0*x", + SILLYNAME_FILEID_LEN, fileid, + SILLYNAME_COUNTER_LEN, sillycounter); - dfprintk(VFS, "NFS: trying to rename %s to %s\n", - dentry->d_name.name, silly); + dfprintk(VFS, "NFS: trying to rename %pd to %s\n", + dentry, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* @@ -539,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) } /* run the rename task, undo unlink if it fails */ - task = nfs_async_rename(dir, dir, dentry, sdentry); + task = nfs_async_rename(dir, dir, dentry, sdentry, + nfs_complete_sillyrename); if (IS_ERR(task)) { error = -EBUSY; nfs_cancel_async_unlink(dentry); @@ -550,6 +584,18 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) error = rpc_wait_for_completion_task(task); if (error == 0) error = task->tk_status; + switch (error) { + case 0: + /* The rename succeeded */ + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + break; + case -ERESTARTSYS: + /* The result of the rename is unknown. Play it safe by + * forcing a new lookup */ + d_drop(dentry); + d_drop(sdentry); + } rpc_put_task(task); out_dput: dput(sdentry); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c483cc50b82..5e2f1030454 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -31,6 +31,8 @@ #include "fscache.h" #include "pnfs.h" +#include "nfstrace.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE #define MIN_POOL_WRITE (32) @@ -40,10 +42,11 @@ * Local function declarations */ static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; +static const struct nfs_rw_ops nfs_rw_write_ops; +static void nfs_clear_request_commit(struct nfs_page *req); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -68,76 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(void) +static struct nfs_rw_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - - if (p) { - struct nfs_pgio_header *hdr = &p->header; + struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + if (p) memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - hdr->verf = &p->verf; - } return p; } -EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static void nfs_writehdr_free(struct nfs_rw_header *whdr) { - struct nfs_write_data *data, *prealloc; - - prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - -void nfs_writehdr_free(struct nfs_pgio_header *hdr) -{ - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); mempool_free(whdr, nfs_wdata_mempool); } -EXPORT_SYMBOL_GPL(nfs_writehdr_free); - -void nfs_writedata_release(struct nfs_write_data *wdata) -{ - struct nfs_pgio_header *hdr = wdata->header; - struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); - - put_nfs_open_context(wdata->args.context); - if (wdata->pages.pagevec != wdata->pages.page_array) - kfree(wdata->pages.pagevec); - if (wdata == &write_header->rpc_data) { - wdata->header = NULL; - wdata = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(wdata); -} -EXPORT_SYMBOL_GPL(nfs_writedata_release); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -146,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); } +/* + * nfs_page_find_head_request_locked - find head request associated with @page + * + * must be called while holding the inode lock. + * + * returns matching head request with reference held, or NULL if not found. + */ static struct nfs_page * -nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) +nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) { struct nfs_page *req = NULL; @@ -159,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page) /* Linearly search the commit list for the correct req */ list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { if (freq->wb_page == page) { - req = freq; + req = freq->wb_head; break; } } } - if (req) + if (req) { + WARN_ON_ONCE(req->wb_head != req); + kref_get(&req->wb_kref); + } return req; } -static struct nfs_page *nfs_page_find_request(struct page *page) +/* + * nfs_page_find_head_request - find head request associated with @page + * + * returns matching head request with reference held, or NULL if not found. + */ +static struct nfs_page *nfs_page_find_head_request(struct page *page) { struct inode *inode = page_file_mapping(page)->host; struct nfs_page *req = NULL; spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(NFS_I(inode), page); + req = nfs_page_find_head_request_locked(NFS_I(inode), page); spin_unlock(&inode->i_lock); return req; } @@ -209,18 +170,78 @@ static void nfs_set_pageerror(struct page *page) nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } +/* + * nfs_page_group_search_locked + * @head - head request of page group + * @page_offset - offset into page + * + * Search page group with head @head to find a request that contains the + * page offset @page_offset. + * + * Returns a pointer to the first matching nfs request, or NULL if no + * match is found. + * + * Must be called with the page group lock held + */ +static struct nfs_page * +nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) +{ + struct nfs_page *req; + + WARN_ON_ONCE(head != head->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); + + req = head; + do { + if (page_offset >= req->wb_pgbase && + page_offset < (req->wb_pgbase + req->wb_bytes)) + return req; + + req = req->wb_this_page; + } while (req != head); + + return NULL; +} + +/* + * nfs_page_group_covers_page + * @head - head request of page group + * + * Return true if the page group with head @head covers the whole page, + * returns false otherwise + */ +static bool nfs_page_group_covers_page(struct nfs_page *req) +{ + struct nfs_page *tmp; + unsigned int pos = 0; + unsigned int len = nfs_page_length(req->wb_page); + + nfs_page_group_lock(req); + + do { + tmp = nfs_page_group_search_locked(req->wb_head, pos); + if (tmp) { + /* no way this should happen */ + WARN_ON_ONCE(tmp->wb_pgbase != pos); + pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); + } + } while (tmp && pos < len); + + nfs_page_group_unlock(req); + WARN_ON_ONCE(pos > len); + return pos == len; +} + /* We can set the PG_uptodate flag if we see that a write request * covers the full page. */ -static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +static void nfs_mark_uptodate(struct nfs_page *req) { - if (PageUptodate(page)) - return; - if (base != 0) + if (PageUptodate(req->wb_page)) return; - if (count != nfs_page_length(page)) + if (!nfs_page_group_covers_page(req)) return; - SetPageUptodate(page); + SetPageUptodate(req->wb_page); } static int wb_priority(struct writeback_control *wbc) @@ -256,46 +277,259 @@ static void nfs_set_page_writeback(struct page *page) } } -static void nfs_end_page_writeback(struct page *page) +static void nfs_end_page_writeback(struct nfs_page *req) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = page_file_mapping(req->wb_page)->host; struct nfs_server *nfss = NFS_SERVER(inode); - end_page_writeback(page); + if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) + return; + + end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) + +/* nfs_page_group_clear_bits + * @req - an nfs request + * clears all page group related bits from @req + */ +static void +nfs_page_group_clear_bits(struct nfs_page *req) +{ + clear_bit(PG_TEARDOWN, &req->wb_flags); + clear_bit(PG_UNLOCKPAGE, &req->wb_flags); + clear_bit(PG_UPTODATE, &req->wb_flags); + clear_bit(PG_WB_END, &req->wb_flags); + clear_bit(PG_REMOVE, &req->wb_flags); +} + + +/* + * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req + * + * this is a helper function for nfs_lock_and_join_requests + * + * @inode - inode associated with request page group, must be holding inode lock + * @head - head request of page group, must be holding head lock + * @req - request that couldn't lock and needs to wait on the req bit lock + * @nonblock - if true, don't actually wait + * + * NOTE: this must be called holding page_group bit lock and inode spin lock + * and BOTH will be released before returning. + * + * returns 0 on success, < 0 on error. + */ +static int +nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head, + struct nfs_page *req, bool nonblock) + __releases(&inode->i_lock) +{ + struct nfs_page *tmp; + int ret; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head ; tmp != req; tmp = tmp->wb_this_page) + nfs_unlock_request(tmp); + + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + + /* grab a ref on the request that will be waited on */ + kref_get(&req->wb_kref); + + nfs_page_group_unlock(head); + spin_unlock(&inode->i_lock); + + /* release ref from nfs_page_find_head_request_locked */ + nfs_release_request(head); + + if (!nonblock) + ret = nfs_wait_on_request(req); + else + ret = -EAGAIN; + nfs_release_request(req); + + return ret; +} + +/* + * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests + * + * @destroy_list - request list (using wb_this_page) terminated by @old_head + * @old_head - the old head of the list + * + * All subrequests must be locked and removed from all lists, so at this point + * they are only "active" in this function, and possibly in nfs_wait_on_request + * with a reference held by some other context. + */ +static void +nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, + struct nfs_page *old_head) +{ + while (destroy_list) { + struct nfs_page *subreq = destroy_list; + + destroy_list = (subreq->wb_this_page == old_head) ? + NULL : subreq->wb_this_page; + + WARN_ON_ONCE(old_head != subreq->wb_head); + + /* make sure old group is not used */ + subreq->wb_head = subreq; + subreq->wb_this_page = subreq; + + nfs_clear_request_commit(subreq); + + /* subreq is now totally disconnected from page group or any + * write / commit lists. last chance to wake any waiters */ + nfs_unlock_request(subreq); + + if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) { + /* release ref on old head request */ + nfs_release_request(old_head); + + nfs_page_group_clear_bits(subreq); + + /* release the PG_INODE_REF reference */ + if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) + nfs_release_request(subreq); + else + WARN_ON_ONCE(1); + } else { + WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags)); + /* zombie requests have already released the last + * reference and were waiting on the rest of the + * group to complete. Since it's no longer part of a + * group, simply free the request */ + nfs_page_group_clear_bits(subreq); + nfs_free_request(subreq); + } + } +} + +/* + * nfs_lock_and_join_requests - join all subreqs to the head req and return + * a locked reference, cancelling any pending + * operations for this page. + * + * @page - the page used to lookup the "page group" of nfs_page structures + * @nonblock - if true, don't block waiting for request locks + * + * This function joins all sub requests to the head request by first + * locking all requests in the group, cancelling any pending operations + * and finally updating the head request to cover the whole range covered by + * the (former) group. All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + * + * Returns a locked, referenced pointer to the head request - which after + * this call is guaranteed to be the only request associated with the page. + * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * error was encountered. + */ +static struct nfs_page * +nfs_lock_and_join_requests(struct page *page, bool nonblock) { struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *req; + struct nfs_page *head, *subreq; + struct nfs_page *destroy_list = NULL; + unsigned int total_bytes; int ret; +try_again: + total_bytes = 0; + + WARN_ON_ONCE(destroy_list); + spin_lock(&inode->i_lock); - for (;;) { - req = nfs_page_find_request_locked(NFS_I(inode), page); - if (req == NULL) - break; - if (nfs_lock_request(req)) - break; - /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_lock_request() will always - * succeed provided that someone hasn't already marked the - * request as dirty (in which case we don't care). - */ + + /* + * A reference is taken only on the head request which acts as a + * reference to the whole page group - the group will not be destroyed + * until the head reference is released. + */ + head = nfs_page_find_head_request_locked(NFS_I(inode), page); + + if (!head) { spin_unlock(&inode->i_lock); - if (!nonblock) - ret = nfs_wait_on_request(req); - else - ret = -EAGAIN; - nfs_release_request(req); - if (ret != 0) + return NULL; + } + + /* lock each request in the page group */ + nfs_page_group_lock(head); + subreq = head; + do { + /* + * Subrequests are always contiguous, non overlapping + * and in order. If not, it's a programming error. + */ + WARN_ON_ONCE(subreq->wb_offset != + (head->wb_offset + total_bytes)); + + /* keep track of how many bytes this group covers */ + total_bytes += subreq->wb_bytes; + + if (!nfs_lock_request(subreq)) { + /* releases page group bit lock and + * inode spin lock and all references */ + ret = nfs_unroll_locks_and_wait(inode, head, + subreq, nonblock); + + if (ret == 0) + goto try_again; + return ERR_PTR(ret); - spin_lock(&inode->i_lock); + } + + subreq = subreq->wb_this_page; + } while (subreq != head); + + /* Now that all requests are locked, make sure they aren't on any list. + * Commit list removal accounting is done after locks are dropped */ + subreq = head; + do { + nfs_list_remove_request(subreq); + subreq = subreq->wb_this_page; + } while (subreq != head); + + /* unlink subrequests from head, destroy them later */ + if (head->wb_this_page != head) { + /* destroy list will be terminated by head */ + destroy_list = head->wb_this_page; + head->wb_this_page = head; + + /* change head request to cover whole range that + * the former page group covered */ + head->wb_bytes = total_bytes; } + + /* + * prepare head request to be added to new pgio descriptor + */ + nfs_page_group_clear_bits(head); + + /* + * some part of the group was still on the inode list - otherwise + * the group wouldn't be involved in async write. + * grab a reference for the head request, iff it needs one. + */ + if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags)) + kref_get(&head->wb_kref); + + nfs_page_group_unlock(head); + + /* drop lock to clear_request_commit the head req and clean up + * requests on destroy list */ spin_unlock(&inode->i_lock); - return req; + + nfs_destroy_unlinked_subrequests(destroy_list, head); + + /* clean up commit list state */ + nfs_clear_request_commit(head); + + /* still holds ref on head from nfs_page_find_head_request_locked + * and still has lock on head from lock loop */ + return head; } /* @@ -308,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct nfs_page *req; int ret = 0; - req = nfs_find_and_lock_request(page, nonblock); + req = nfs_lock_and_join_requests(page, nonblock); if (!req) goto out; ret = PTR_ERR(req); @@ -352,10 +586,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, - page->mapping->host, - wb_priority(wbc), - &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -398,12 +630,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); clear_bit_unlock(NFS_INO_FLUSHING, bitlock); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(bitlock, NFS_INO_FLUSHING); if (err < 0) @@ -423,6 +656,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); + WARN_ON_ONCE(req->wb_this_page != req); + /* Lock the request! */ nfs_lock_request(req); @@ -439,6 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); } nfsi->npages++; + /* this a head request for a page group - mark it as having an + * extra reference so sub groups can follow suit */ + WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); } @@ -450,16 +688,23 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *head; - spin_lock(&inode->i_lock); - if (likely(!PageSwapCache(req->wb_page))) { - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + head = req->wb_head; + + spin_lock(&inode->i_lock); + if (likely(!PageSwapCache(head->wb_page))) { + set_page_private(head->wb_page, 0); + ClearPagePrivate(head->wb_page); + clear_bit(PG_MAPPED, &head->wb_flags); + } + nfsi->npages--; + spin_unlock(&inode->i_lock); } - nfsi->npages--; - spin_unlock(&inode->i_lock); - nfs_release_request(req); + + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + nfs_release_request(req); } static void @@ -581,7 +826,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { if (data->verf.committed == NFS_DATA_SYNC) return data->header->lseg == NULL; @@ -612,7 +857,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { return 0; } @@ -643,7 +888,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -651,7 +896,7 @@ remove_req: nfs_inode_remove_request(req); next: nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); nfs_release_request(req); } out: @@ -659,7 +904,7 @@ out: } #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) -static unsigned long +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return cinfo->mds->ncommit; @@ -716,7 +961,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, } #else -static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return 0; } @@ -752,10 +997,14 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); for (;;) { - req = nfs_page_find_request_locked(NFS_I(inode), page); + req = nfs_page_find_head_request_locked(NFS_I(inode), page); if (req == NULL) goto out_unlock; + /* should be handled by nfs_flush_incompatible */ + WARN_ON_ONCE(req->wb_head != req); + WARN_ON_ONCE(req->wb_this_page != req); + rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if @@ -817,7 +1066,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, inode, page, offset, bytes); + req = nfs_create_request(ctx, page, NULL, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); @@ -835,7 +1084,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, return PTR_ERR(req); /* Update file length */ nfs_grow_file(page, offset, count); - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_uptodate(req); nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); return 0; @@ -856,12 +1105,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * dropped page. */ do { - req = nfs_page_find_request(page); + req = nfs_page_find_head_request(page); if (req == NULL) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || req->wb_context != ctx; - if (l_ctx) { + /* for now, flush if more than 1 request in page_group */ + do_flush |= req->wb_this_page != req; + if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; } @@ -874,20 +1125,77 @@ int nfs_flush_incompatible(struct file *file, struct page *page) } /* + * Avoid buffered writes when a open context credential's key would + * expire soon. + * + * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL. + * + * Return 0 and set a credential flag which triggers the inode to flush + * and performs NFS_FILE_SYNC writes if the key will expired within + * RPC_KEY_EXPIRE_TIMEO. + */ +int +nfs_key_timeout_notify(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx = nfs_file_open_context(filp); + struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + + return rpcauth_key_timeout_notify(auth, ctx->cred); +} + +/* + * Test if the open context credential key is marked to expire soon. + */ +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx) +{ + return rpcauth_cred_key_to_expire(ctx->cred); +} + +/* * If the page cache is marked as unsafe or invalid, then we can't rely on * the PageUptodate() flag. In this case, we will need to turn off * write optimisations that depend on the page contents being correct. */ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { + struct nfs_inode *nfsi = NFS_I(inode); + if (nfs_have_delegated_attributes(inode)) goto out; - if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) + if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) + return false; + smp_rmb(); + if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) return false; out: + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + return false; return PageUptodate(page) != 0; } +/* If we know the page is up to date, and we're not using byte range locks (or + * if we have the whole file locked for writing), it may be more efficient to + * extend the write to cover the entire page in order to avoid fragmentation + * inefficiencies. + * + * If the file is opened for synchronous writes then we can just skip the rest + * of the checks. + */ +static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) +{ + if (file->f_flags & O_DSYNC) + return 0; + if (!nfs_write_pageuptodate(page, inode)) + return 0; + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + return 1; + if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 && + inode->i_flock->fl_end == OFFSET_MAX && + inode->i_flock->fl_type != F_RDLCK)) + return 1; + return 0; +} + /* * Update and possibly write a cached page of an NFS file. * @@ -903,19 +1211,10 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, count, - (long long)(page_file_offset(page) + offset)); + dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", + file, count, (long long)(page_file_offset(page) + offset)); - /* If we're not using byte range locks, and we know the page - * is up to date, it may be more efficient to extend the write - * to cover the entire page in order to avoid fragmentation - * inefficiencies. - */ - if (nfs_write_pageuptodate(page, inode) && - inode->i_flock == NULL && - !(file->f_flags & O_DSYNC)) { + if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } @@ -942,123 +1241,17 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags) +static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int priority = flush_task_priority(how); - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = clnt, - .task = &data->task, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | flags, - .priority = priority, - }; - int ret = 0; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->write_setup(data, &msg); - - dprintk("NFS: %5u initiated write call " - "(req %s/%lld, %u bytes @ offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); + task_setup_data->priority = priority; + NFS_PROTO(inode)->write_setup(data, msg); - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } - rpc_put_task(task); -out: - return ret; -} -EXPORT_SYMBOL_GPL(nfs_initiate_write); - -/* - * Set up the argument/result storage required for the RPC call. - */ -static void nfs_write_rpcsetup(struct nfs_write_data *data, - unsigned int count, unsigned int offset, - int how, struct nfs_commit_info *cinfo) -{ - struct nfs_page *req = data->header->req; - - /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ - - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; - /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; - switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - case 0: - break; - case FLUSH_COND_STABLE: - if (nfs_reqs_to_commit(cinfo)) - break; - default: - data->args.stable = NFS_FILE_SYNC; - } - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); -} - -static int nfs_do_write(struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, - int how) -{ - struct inode *inode = data->header->inode; - - return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); -} - -static int nfs_do_multiple_writes(struct list_head *head, - const struct rpc_call_ops *call_ops, - int how) -{ - struct nfs_write_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_write_data, list); - list_del_init(&data->list); - - ret2 = nfs_do_write(data, call_ops, how); - if (ret == 0) - ret = ret2; - } - return ret; + nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, + &task_setup_data->rpc_client, msg, data); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1069,7 +1262,7 @@ static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); nfs_release_request(req); } @@ -1089,170 +1282,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static void nfs_flush_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_write_data, list); - list_del(&data->list); - nfs_writedata_release(data); - } - desc->pg_completion_ops->error_cleanup(&desc->pg_list); -} - -/* - * Generate multiple small requests to write out a single - * contiguous dirty area on one page. - */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_write_data *data; - size_t wsize = desc->pg_bsize, nbytes; - unsigned int offset; - int requests = 0; - struct nfs_commit_info cinfo; - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || - desc->pg_count > wsize)) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes, wsize); - - data = nfs_writedata_alloc(hdr, 1); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } - data->pages.pagevec[0] = page; - nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - requests++; - nbytes -= len; - offset += len; - } while (nbytes != 0); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_write_common_ops; - return 0; -} - -/* - * Create an RPC task for the given write request and kick it. - * The page must have been locked by the caller. - * - * It may happen that the page we're passed is not marked dirty. - * This is the case if nfs_updatepage detects a conflicting request - * that has been written but not committed. - */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req; - struct page **pages; - struct nfs_write_data *data; - struct list_head *head = &desc->pg_list; - struct nfs_commit_info cinfo; - - data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; - } - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - /* Set up the argument struct */ - nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_write_common_ops; - return 0; -} - -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc, hdr); - return nfs_flush_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_flush); - -static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) -{ - struct nfs_write_header *whdr; - struct nfs_pgio_header *hdr; - int ret; - - whdr = nfs_writehdr_alloc(); - if (!whdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); - return -ENOMEM; - } - hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_writehdr_free); - atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); - if (ret == 0) - ret = nfs_do_multiple_writes(&hdr->rpc_list, - desc->pg_rpc_callops, - desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - return ret; -} - -static const struct nfs_pageio_ops nfs_pageio_write_ops = { - .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_writepages, -}; - void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, - NFS_SERVER(inode)->wsize, ioflags); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_write_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { - pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_write_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); -} - void nfs_commit_prepare(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; @@ -1260,23 +1313,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -/* - * Handle a write reply that flushes a whole page. - * - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. - */ -static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - nfs_writeback_done(task, data); -} - -static void nfs_writeback_release_common(void *calldata) +static void nfs_writeback_release_common(struct nfs_pgio_data *data) { - struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1285,34 +1323,46 @@ static void nfs_writeback_release_common(void *calldata) if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); - else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) + memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_writedata_release(data); } -static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_writeback_done_common, - .rpc_release = nfs_writeback_release_common, -}; +/* + * Special version of should_remove_suid() that ignores capabilities. + */ +static int nfs_should_remove_suid(const struct inode *inode) +{ + umode_t mode = inode->i_mode; + int kill = 0; + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && S_ISREG(mode))) + return kill; + + return 0; +} /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct nfs_writeargs *argp = &data->args; - struct nfs_writeres *resp = &data->res; - struct inode *inode = data->header->inode; int status; - dprintk("NFS: %5u nfs_writeback_done (status %d)\n", - task->tk_pid, task->tk_status); - /* * ->write_done will attempt to use post-op attributes to detect * conflicting writes by other clients. A strict interpretation @@ -1322,11 +1372,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) - return; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + return status; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1342,18 +1392,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - resp->verf->committed, argp->stable); + data->res.verf->committed, data->args.stable); complain = jiffies + 300 * HZ; } } #endif - if (task->tk_status < 0) - nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - else if (resp->count < argp->count) { + + /* Deal with the suid/sgid bit corner case */ + if (nfs_should_remove_suid(inode)) + nfs_mark_for_revalidate(inode); + return 0; +} + +/* + * This function is called when the WRITE call is complete. + */ +static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +{ + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; + + if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1404,7 +1467,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) static void nfs_commit_clear_lock(struct nfs_inode *nfsi) { clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); } @@ -1441,6 +1504,9 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -1555,9 +1621,9 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) nfs_list_remove_request(req); nfs_clear_page_commit(req->wb_page); - dprintk("NFS: commit (%s/%lld %d@%lld)", + dprintk("NFS: commit (%s/%llu %d@%lld)", req->wb_context->dentry->d_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { @@ -1715,8 +1781,14 @@ int nfs_wb_all(struct inode *inode) .range_start = 0, .range_end = LLONG_MAX, }; + int ret; + + trace_nfs_writeback_inode_enter(inode); + + ret = sync_inode(inode, &wbc); - return sync_inode(inode, &wbc); + trace_nfs_writeback_inode_exit(inode, ret); + return ret; } EXPORT_SYMBOL_GPL(nfs_wb_all); @@ -1725,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) struct nfs_page *req; int ret = 0; - for (;;) { - wait_on_page_writeback(page); - req = nfs_page_find_request(page); - if (req == NULL) - break; - if (nfs_lock_request(req)) { - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - /* - * In case nfs_inode_remove_request has marked the - * page as being dirty - */ - cancel_dirty_page(page, PAGE_CACHE_SIZE); - nfs_unlock_and_release_request(req); - break; - } - ret = nfs_wait_on_request(req); - nfs_release_request(req); - if (ret < 0) - break; + wait_on_page_writeback(page); + + /* blocking call to cancel all requests and join to a single (head) + * request */ + req = nfs_lock_and_join_requests(page, false); + + if (IS_ERR(req)) { + ret = PTR_ERR(req); + } else if (req) { + /* all requests from this page have been cancelled by + * nfs_lock_and_join_requests, so just remove the head + * request from the inode / page_private pointer and + * release it */ + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_and_release_request(req); } + return ret; } @@ -1764,6 +1837,8 @@ int nfs_wb_page(struct inode *inode, struct page *page) }; int ret; + trace_nfs_writeback_page_enter(inode); + for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { @@ -1772,14 +1847,15 @@ int nfs_wb_page(struct inode *inode, struct page *page) goto out_error; continue; } + ret = 0; if (!PagePrivate(page)) break; ret = nfs_commit_inode(inode, FLUSH_SYNC); if (ret < 0) goto out_error; } - return 0; out_error: + trace_nfs_writeback_page_exit(inode, ret); return ret; } @@ -1808,7 +1884,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) @@ -1870,3 +1946,12 @@ void nfs_destroy_writepagecache(void) kmem_cache_destroy(nfs_wdata_cachep); } +static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_mode = FMODE_WRITE, + .rw_alloc_header = nfs_writehdr_alloc, + .rw_free_header = nfs_writehdr_free, + .rw_release = nfs_writeback_release_common, + .rw_done = nfs_writeback_done, + .rw_result = nfs_writeback_result, + .rw_initiate = nfs_initiate_write, +}; |
