diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 23:52:35 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 23:52:35 +0900 |
commit | df632d3ce7eacf92ad9b642301c7b53a1d95b8d8 (patch) | |
tree | 848c39ed4f7cfdb582bf2e0a0a03147efaa5198d /fs/nfs | |
parent | 2474542f64432398f503373f53bdf620491bcfa8 (diff) | |
parent | af283885b70248268617955a5ea5476647bd556b (diff) |
Merge tag 'nfs-for-3.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Features include:
- Remove CONFIG_EXPERIMENTAL dependency from NFSv4.1
Aside from the issues discussed at the LKS, distros are shipping
NFSv4.1 with all the trimmings.
- Fix fdatasync()/fsync() for the corner case of a server reboot.
- NFSv4 OPEN access fix: finally distinguish correctly between
open-for-read and open-for-execute permissions in all situations.
- Ensure that the TCP socket is closed when we're in CLOSE_WAIT
- More idmapper bugfixes
- Lots of pNFS bugfixes and cleanups to remove unnecessary state and
make the code easier to read.
- In cases where a pNFS read or write fails, allow the client to
resume trying layoutgets after two minutes of read/write-
through-mds.
- More net namespace fixes to the NFSv4 callback code.
- More net namespace fixes to the NFSv3 locking code.
- More NFSv4 migration preparatory patches.
Including patches to detect network trunking in both NFSv4 and
NFSv4.1
- pNFS block updates to optimise LAYOUTGET calls."
* tag 'nfs-for-3.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (113 commits)
pnfsblock: cleanup nfs4_blkdev_get
NFS41: send real read size in layoutget
NFS41: send real write size in layoutget
NFS: track direct IO left bytes
NFSv4.1: Cleanup ugliness in pnfs_layoutgets_blocked()
NFSv4.1: Ensure that the layout sequence id stays 'close' to the current
NFSv4.1: Deal with seqid wraparound in the pNFS return-on-close code
NFSv4 set open access operation call flag in nfs4_init_opendata_res
NFSv4.1: Remove the dependency on CONFIG_EXPERIMENTAL
NFSv4 reduce attribute requests for open reclaim
NFSv4: nfs4_open_done first must check that GETATTR decoded a file type
NFSv4.1: Deal with wraparound when updating the layout "barrier" seqid
NFSv4.1: Deal with wraparound issues when updating the layout stateid
NFSv4.1: Always set the layout stateid if this is the first layoutget
NFSv4.1: Fix another refcount issue in pnfs_find_alloc_layout
NFSv4: don't put ACCESS in OPEN compound if O_EXCL
NFSv4: don't check MAY_WRITE access bit in OPEN
NFS: Set key construction data for the legacy upcall
NFSv4.1: don't do two EXCHANGE_IDs on mount
NFS: nfs41_walk_client_list(): re-lock before iterating
...
Diffstat (limited to 'fs/nfs')
35 files changed, 1848 insertions, 664 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index db7ad719628..13ca196385f 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -95,8 +95,8 @@ config NFS_SWAP This option enables swapon to work on files located on NFS mounts. config NFS_V4_1 - bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" - depends on NFS_V4 && EXPERIMENTAL + bool "NFS client support for NFSv4.1" + depends on NFS_V4 select SUNRPC_BACKCHANNEL help This option enables support for minor version 1 of the NFSv4 protocol diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dd392ed5f2e..f1027b06a1a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -37,6 +37,7 @@ #include <linux/bio.h> /* struct bio */ #include <linux/buffer_head.h> /* various write calls */ #include <linux/prefetch.h> +#include <linux/pagevec.h> #include "../pnfs.h" #include "../internal.h" @@ -162,25 +163,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, return bio; } -static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, +static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, void (*end_io)(struct bio *, int err), - struct parallel_io *par) + struct parallel_io *par, + unsigned int offset, int len) { + isect = isect + (offset >> SECTOR_SHIFT); + dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, + npg, rw, (unsigned long long)isect, offset, len); retry: if (!bio) { bio = bl_alloc_init_bio(npg, isect, be, end_io, par); if (!bio) return ERR_PTR(-ENOMEM); } - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + if (bio_add_page(bio, page, len, offset) < len) { bio = bl_submit_bio(rw, bio); goto retry; } return bio; } +static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, + sector_t isect, struct page *page, + struct pnfs_block_extent *be, + void (*end_io)(struct bio *, int err), + struct parallel_io *par) +{ + return do_add_page_to_bio(bio, npg, rw, isect, page, be, + end_io, par, 0, PAGE_CACHE_SIZE); +} + /* This is basically copied from mpage_end_io_read */ static void bl_end_io_read(struct bio *bio, int err) { @@ -228,14 +243,6 @@ bl_end_par_io_read(void *data, int unused) schedule_work(&rdata->task.u.tk_work); } -static bool -bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) -{ - if ((offset & blkmask) || (len & blkmask)) - return false; - return true; -} - static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -246,15 +253,15 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; + size_t bytes_left = rdata->args.count; + unsigned int pg_offset, pg_len; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); - if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) - goto use_mds; - par = alloc_parallel(rdata); if (!par) goto use_mds; @@ -284,36 +291,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) extent_length = min(extent_length, cow_length); } } + + if (is_dio) { + pg_offset = f_offset & ~PAGE_CACHE_MASK; + if (pg_offset + bytes_left > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = bytes_left; + + f_offset += pg_len; + bytes_left -= pg_len; + isect += (pg_offset >> SECTOR_SHIFT); + } else { + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } + hole = is_hole(be, isect); if (hole && !cow_read) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ dprintk("%s Zeroing page for hole\n", __func__); - zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); + zero_user_segment(pages[i], pg_offset, pg_len); print_page(pages[i]); SetPageUptodate(pages[i]); } else { struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, rdata->pages.npages - i, READ, isect, pages[i], be_read, - bl_end_io_read, par); + bl_end_io_read, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } } - isect += PAGE_CACHE_SECTORS; + isect += (pg_len >> SECTOR_SHIFT); extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - f_offset; + rdata->res.count = header->inode->i_size - rdata->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; + rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; } out: bl_put_extent(be); @@ -461,6 +485,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) return; } +static void +bl_read_single_end_io(struct bio *bio, int error) +{ + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct page *page = bvec->bv_page; + + /* Only one page in bvec */ + unlock_page(page); +} + +static int +bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int offset, unsigned int len) +{ + struct bio *bio; + struct page *shadow_page; + sector_t isect; + char *kaddr, *kshadow_addr; + int ret = 0; + + dprintk("%s: offset %u len %u\n", __func__, offset, len); + + shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (shadow_page == NULL) + return -ENOMEM; + + bio = bio_alloc(GFP_NOIO, 1); + if (bio == NULL) + return -ENOMEM; + + isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + + (offset / SECTOR_SIZE); + + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = bl_read_single_end_io; + + lock_page(shadow_page); + if (bio_add_page(bio, shadow_page, + SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { + unlock_page(shadow_page); + bio_put(bio); + return -EIO; + } + + submit_bio(READ, bio); + wait_on_page_locked(shadow_page); + if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { + ret = -EIO; + } else { + kaddr = kmap_atomic(page); + kshadow_addr = kmap_atomic(shadow_page); + memcpy(kaddr + offset, kshadow_addr + offset, len); + kunmap_atomic(kshadow_addr); + kunmap_atomic(kaddr); + } + __free_page(shadow_page); + bio_put(bio); + + return ret; +} + +static int +bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int dirty_offset, unsigned int dirty_len, + bool full_page) +{ + int ret = 0; + unsigned int start, end; + + if (full_page) { + start = 0; + end = PAGE_CACHE_SIZE; + } else { + start = round_down(dirty_offset, SECTOR_SIZE); + end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); + } + + dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); + if (!be) { + zero_user_segments(page, start, dirty_offset, + dirty_offset + dirty_len, end); + if (start == 0 && end == PAGE_CACHE_SIZE && + trylock_page(page)) { + SetPageUptodate(page); + unlock_page(page); + } + return ret; + } + + if (start != dirty_offset) + ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); + + if (!ret && (dirty_offset + dirty_len < end)) + ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, + end - dirty_offset - dirty_len); + + return ret; +} + /* Given an unmapped page, zero it or read in page for COW, page is locked * by caller. */ @@ -494,7 +618,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) SetPageUptodate(page); cleanup: - bl_put_extent(cow_read); if (bh) free_buffer_head(bh); if (ret) { @@ -566,6 +689,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct parallel_io *par = NULL; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; struct page **pages = wdata->args.pages; struct page *page; pgoff_t index; @@ -574,10 +698,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); - /* Check for alignment first */ - if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) - goto out_mds; + if (header->dreq != NULL && + (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || + !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { + dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); + goto out_mds; + } /* At this point, wdata->pages is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. @@ -674,10 +801,11 @@ next_page: if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); + bl_put_extent(cow_read); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), - isect, NULL); + isect, &cow_read); if (!be || !is_writable(be, isect)) { header->pnfs_error = -EINVAL; goto out; @@ -694,7 +822,26 @@ next_page: extent_length = be->be_length - (isect - be->be_f_offset); } - if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + + dprintk("%s offset %lld count %Zu\n", __func__, offset, count); + pg_offset = offset & ~PAGE_CACHE_MASK; + if (pg_offset + count > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = count; + + saved_len = pg_len; + if (be->be_state == PNFS_BLOCK_INVALID_DATA && + !bl_is_sector_init(be->be_inval, isect)) { + ret = bl_read_partial_page_sync(pages[i], cow_read, + pg_offset, pg_len, true); + if (ret) { + dprintk("%s bl_read_partial_page_sync fail %d\n", + __func__, ret); + header->pnfs_error = ret; + goto out; + } + ret = bl_mark_sectors_init(be->be_inval, isect, PAGE_CACHE_SECTORS); if (unlikely(ret)) { @@ -703,15 +850,35 @@ next_page: header->pnfs_error = ret; goto out; } + + /* Expand to full page write */ + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } else if ((pg_offset & (SECTOR_SIZE - 1)) || + (pg_len & (SECTOR_SIZE - 1))){ + /* ahh, nasty case. We have to do sync full sector + * read-modify-write cycles. + */ + unsigned int saved_offset = pg_offset; + ret = bl_read_partial_page_sync(pages[i], be, pg_offset, + pg_len, false); + pg_offset = round_down(pg_offset, SECTOR_SIZE); + pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) + - pg_offset; } - bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + + + bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, isect, pages[i], be, - bl_end_io_write, par); + bl_end_io_write, par, + pg_offset, pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } + offset += saved_len; + count -= saved_len; isect += PAGE_CACHE_SECTORS; last_isect = isect; extent_length -= PAGE_CACHE_SECTORS; @@ -729,17 +896,16 @@ next_page: } write_done: - wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); - if (count < wdata->res.count) { - wdata->res.count = count; - } + wdata->res.count = wdata->args.count; out: bl_put_extent(be); + bl_put_extent(cow_read); bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; out_mds: bl_put_extent(be); + bl_put_extent(cow_read); kfree(par); return PNFS_NOT_ATTEMPTED; } @@ -874,7 +1040,7 @@ static void free_blk_mountid(struct block_mount_id *mid) } } -/* This is mostly copied from the filelayout's get_device_info function. +/* This is mostly copied from the filelayout_get_device_info function. * It seems much of this should be at the generic pnfs level. */ static struct pnfs_block_dev * @@ -1011,33 +1177,95 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static bool +is_aligned_req(struct nfs_page *req, unsigned int alignment) +{ + return IS_ALIGNED(req->wb_offset, alignment) && + IS_ALIGNED(req->wb_bytes, alignment); +} + static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) nfs_pageio_reset_read_mds(pgio); else pnfs_generic_pg_init_read(pgio, req); } +static bool +bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, SECTOR_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); +} + +/* + * Return the number of contiguous bytes for a given inode + * starting at page frame idx. + */ +static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) +{ + struct address_space *mapping = inode->i_mapping; + pgoff_t end; + + /* Optimize common case that writes from 0 to end of file */ + end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); + if (end != NFS_I(inode)->npages) { + rcu_read_lock(); + end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + rcu_read_unlock(); + } + + if (!end) + return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); + else + return (end - idx) << PAGE_CACHE_SHIFT; +} + static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) { nfs_pageio_reset_write_mds(pgio); - else - pnfs_generic_pg_init_write(pgio, req); + } else { + u64 wb_size; + if (pgio->pg_dreq == NULL) + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, + req->wb_index); + else + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pnfs_generic_pg_init_write(pgio, req, wb_size); + } +} + +static bool +bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_dreq != NULL && + !is_aligned_req(req, PAGE_CACHE_SIZE)) + return false; + + return pnfs_generic_pg_test(pgio, prev, req); } static const struct nfs_pageio_ops bl_pg_read_ops = { .pg_init = bl_pg_init_read, - .pg_test = pnfs_generic_pg_test, + .pg_test = bl_pg_test_read, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { .pg_init = bl_pg_init_write, - .pg_test = pnfs_generic_pg_test, + .pg_test = bl_pg_test_write, .pg_doio = pnfs_generic_pg_writepages, }; diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 03350690118..f4891bde885 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -41,6 +41,7 @@ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) struct block_mount_id { spinlock_t bm_lock; /* protects list */ @@ -172,7 +173,6 @@ struct bl_msg_hdr { /* blocklayoutdev.c */ ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); void bl_pipe_destroy_msg(struct rpc_pipe_msg *); -struct block_device *nfs4_blkdev_get(dev_t dev); int nfs4_blkdev_put(struct block_device *bdev); struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, struct pnfs_device *dev); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index c96554245cc..a86c5bdad9e 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -53,22 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) return 0; } -/* Open a block_device by device number. */ -struct block_device *nfs4_blkdev_get(dev_t dev) -{ - struct block_device *bd; - - dprintk("%s enter\n", __func__); - bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); - if (IS_ERR(bd)) - goto fail; - return bd; -fail: - dprintk("%s failed to open device : %ld\n", - __func__, PTR_ERR(bd)); - return NULL; -} - /* * Release the block device */ @@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server, goto out; } - bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); + bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), + FMODE_READ, NULL); if (IS_ERR(bd)) { - rc = PTR_ERR(bd); - dprintk("%s failed to open device : %d\n", __func__, rc); - rv = ERR_PTR(rc); + dprintk("%s failed to open device : %ld\n", __func__, + PTR_ERR(bd)); + rv = ERR_CAST(bd); goto out; } diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1f9a6032796..9c3e117c3ed 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); p = xdr_encode_hyper(p, 0LL); *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); - list_del(&lce->bse_node); - list_add_tail(&lce->bse_node, &bl->bl_committing); + list_move_tail(&lce->bse_node, &bl->bl_committing); bl->bl_count--; count++; } diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 4c8459e5bde..2245bef50f3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -12,6 +12,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include <linux/errno.h> #include <linux/mutex.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -23,6 +24,7 @@ #include "nfs4_fs.h" #include "callback.h" #include "internal.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -37,7 +39,32 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; -unsigned short nfs_callback_tcpport6; +static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) +{ + int ret; + struct nfs_net *nn = net_generic(net, nfs_net_id); + + ret = svc_create_xprt(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret <= 0) + goto out_err; + nn->nfs_callback_tcpport = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nn->nfs_callback_tcpport, PF_INET, net); + + ret = svc_create_xprt(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); + if (ret > 0) { + nn->nfs_callback_tcpport6 = ret; + dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", + nn->nfs_callback_tcpport6, PF_INET6, net); + } else if (ret != -EAFNOSUPPORT) + goto out_err; + return 0; + +out_err: + return (ret) ? ret : -ENOMEM; +} /* * This is the NFSv4 callback kernel thread. @@ -78,38 +105,23 @@ nfs4_callback_svc(void *vrqstp) * Prepare to bring up the NFSv4 callback service */ static struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs4_callback_up(struct svc_serv *serv) { - int ret; - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret <= 0) - goto out_err; - nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport, PF_INET); - - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); - if (ret > 0) { - nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u)\n", - nfs_callback_tcpport6, PF_INET6); - } else if (ret == -EAFNOSUPPORT) - ret = 0; - else - goto out_err; - return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); - -out_err: - if (ret == 0) - ret = -ENOMEM; - return ERR_PTR(ret); } #if defined(CONFIG_NFS_V4_1) +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) +{ + /* + * Create an svc_sock for the back channel service that shares the + * fore channel connection. + * Returns the input port (0) and sets the svc_serv bc_xprt on success + */ + return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); +} + /* * The callback service for NFSv4.1 callbacks */ @@ -149,28 +161,9 @@ nfs41_callback_svc(void *vrqstp) * Bring up the NFSv4.1 callback service */ static struct svc_rqst * -nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +nfs41_callback_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - int ret; - - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); - if (ret < 0) { - rqstp = ERR_PTR(ret); - goto out; - } - - /* - * Save the svc_serv in the transport so that it can - * be referenced when the session backchannel is initialized - */ - xprt->bc_serv = serv; INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); @@ -180,90 +173,74 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } -out: dprintk("--> %s return %ld\n", __func__, IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); return rqstp; } -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { - if (minorversion) { - *rqstpp = nfs41_callback_up(serv, xprt); - *callback_svc = nfs41_callback_svc; - } - return minorversion; + *rqstpp = nfs41_callback_up(serv); + *callback_svc = nfs41_callback_svc; } static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { if (minorversion) - xprt->bc_serv = cb_info->serv; + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + xprt->bc_serv = serv; } #else -static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, - struct svc_serv *serv, struct rpc_xprt *xprt, - struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) { return 0; } +static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + *rqstpp = ERR_PTR(-ENOTSUPP); + *callback_svc = ERR_PTR(-ENOTSUPP); +} + static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, - struct nfs_callback_data *cb_info) + struct svc_serv *serv) { } #endif /* CONFIG_NFS_V4_1 */ -/* - * Bring up the callback thread if it is not already up. - */ -int nf |