aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-27 13:23:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-27 13:23:02 -0700
commit28890d3598c352ae065b560e0fded3e79c800ba1 (patch)
tree93267c5b29b9e81185e66a6c2e70e67dc626b63f
parent91d41fdf31f74e6e2e5f3cb018eca4200e36e202 (diff)
parented1e6211a0a134ff23592c6f057af982ad5dab52 (diff)
Merge branch 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
* 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (44 commits) NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation() nfs: don't use d_move in nfs_async_rename_done RDMA: Increasing RPCRDMA_MAX_DATA_SEGS SUNRPC: Replace xprt->resend and xprt->sending with a priority queue SUNRPC: Allow caller of rpc_sleep_on() to select priority levels SUNRPC: Support dynamic slot allocation for TCP connections SUNRPC: Clean up the slot table allocation SUNRPC: Initalise the struct xprt upon allocation SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot pnfs: simplify pnfs files module autoloading nfs: document nfsv4 sillyrename issues NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL SUNRPC: Convert the backchannel exports to EXPORT_SYMBOL_GPL SUNRPC: sunrpc should not explicitly depend on NFS config options NFS: Clean up - simplify the switch to read/write-through-MDS NFS: Move the pnfs write code into pnfs.c NFS: Move the pnfs read code into pnfs.c NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request NFS: Cache rpc_ops in struct nfs_pageio_descriptor ...
-rw-r--r--fs/lockd/clntproc.c9
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/callback_proc.c57
-rw-r--r--fs/nfs/client.c7
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/internal.h13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c80
-rw-r--r--fs/nfs/nfs4filelayout.h17
-rw-r--r--fs/nfs/nfs4filelayoutdev.c452
-rw-r--r--fs/nfs/nfs4proc.c215
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfs/nfs4xdr.c247
-rw-r--r--fs/nfs/objlayout/objio_osd.c20
-rw-r--r--fs/nfs/pagelist.c69
-rw-r--r--fs/nfs/pnfs.c221
-rw-r--r--fs/nfs/pnfs.h74
-rw-r--r--fs/nfs/pnfs_dev.c64
-rw-r--r--fs/nfs/read.c166
-rw-r--r--fs/nfs/unlink.c37
-rw-r--r--fs/nfs/write.c156
-rw-r--r--include/linux/nfs4.h3
-rw-r--r--include/linux/nfs_fs_sb.h5
-rw-r--r--include/linux/nfs_page.h17
-rw-r--r--include/linux/nfs_xdr.h34
-rw-r--r--include/linux/pnfs_osd_xdr.h31
-rw-r--r--include/linux/sunrpc/bc_xprt.h6
-rw-r--r--include/linux/sunrpc/sched.h4
-rw-r--r--include/linux/sunrpc/svc.h4
-rw-r--r--include/linux/sunrpc/xprt.h34
-rw-r--r--net/sunrpc/Kconfig4
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/backchannel_rqst.c7
-rw-r--r--net/sunrpc/bc_svc.c3
-rw-r--r--net/sunrpc/clnt.c15
-rw-r--r--net/sunrpc/sched.c38
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svcsock.c14
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprt.c257
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/sunrpc/xprtsock.c57
44 files changed, 1856 insertions, 632 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e374050a911..8392cb85bd5 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
/* We appear to be out of the grace period */
wake_up_all(&host->h_gracewait);
}
- dprintk("lockd: server returns status %d\n", resp->status);
+ dprintk("lockd: server returns status %d\n",
+ ntohl(resp->status));
return 0; /* Okay, call complete */
}
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
goto out;
if (resp->status != nlm_lck_denied_nolocks)
- printk("lockd: unexpected unlock status: %d\n", resp->status);
+ printk("lockd: unexpected unlock status: %d\n",
+ ntohl(resp->status));
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
return -ENOLCK;
#endif
}
- printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+ printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+ ntohl(status));
return -ENOLCK;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 81515545ba7..2cde5d95475 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@ config NFS_V4
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+ select SUNRPC_BACKCHANNEL
select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb..74780f9f852 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
- if (nfs_compare_fh(&args->cbl_fh,
- &NFS_I(lo->plh_inode)->fh))
- continue;
- ino = igrab(lo->plh_inode);
- if (!ino)
- continue;
- found = true;
- /* Without this, layout can be freed as soon
- * as we release cl_lock.
- */
- get_layout_hdr(lo);
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ if (found)
+ break;
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
};
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if ((args->cbl_recall_type == RETURN_FSID) &&
- memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
- &args->cbl_fsid, sizeof(struct nfs_fsid)))
- continue;
- if (!igrab(lo->plh_inode))
+ memcmp(&server->fsid, &args->cbl_fsid,
+ sizeof(struct nfs_fsid)))
continue;
- get_layout_hdr(lo);
- BUG_ON(!list_empty(&lo->plh_bulk_recall));
- list_add(&lo->plh_bulk_recall, &recall_list);
+
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b88b65..19ea7d9c75e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
+ kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
+ INIT_LIST_HEAD(&server->layouts);
atomic_set(&server->active, 0);
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
dprintk("<-- %s %p\n", __func__, clp);
return clp;
}
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2aec37..321a66bc384 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
return err;
}
-static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
-
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
/**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
rcu_read_lock();
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_unlock();
return -ENOENT;
}
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347a2da..ab12913dd47 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
+struct nfs_pageio_descriptor;
/* read.c */
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063bacd28..8102391bb37 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
}
#ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
{
struct gss_api_mech *mech;
struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2eb1ba..1909ee8be35 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
+ NFS4CLNT_SERVER_SCOPE_MISMATCH,
};
enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
int cache_reply);
int (*validate_stateid)(struct nfs_delegation *,
const nfs4_stateid *);
+ int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fsinfo *);
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+ struct server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03abcd04..be93a622872 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
__func__, data->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s USE DS:ip %x %hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
struct nfs_fh *fh;
int status;
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
+ /* FIXME: remove this check when layout segment support is added */
+ if (lgr->range.offset != 0 ||
+ lgr->range.length != NFS4_MAX_UINT64) {
+ dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+ __func__);
+ goto out;
+ }
+
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ /* Found deviceid is being reaped */
+ if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+ goto out_put;
+
fl->dsaddr = dsaddr;
if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
* return true : coalesce page
* return false : don't coalesce page
*/
-bool
+static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;
- if (!pgio->pg_lseg)
- return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_READ,
+ GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_RW,
+ GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_init = filelayout_pg_init_read,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_init = filelayout_pg_init_write,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_writepages,
+};
+
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
- .pg_test = filelayout_pg_test,
+ .pg_read_ops = &filelayout_pg_read_ops,
+ .pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
pnfs_unregister_layoutdriver(&filelayout_type);
}
+MODULE_ALIAS("nfs-layouttype4-1");
+
module_init(nfs4filelayout_init);
module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e3795..2e42284253f 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@ enum stripetype4 {
};
/* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+ struct sockaddr_storage da_addr;
+ size_t da_addrlen;
+ struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ char *da_remotestr; /* human readable addr+port */
+};
+
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- u32 ds_ip_addr;
- u32 ds_port;
+ char *ds_remotestr; /* comma sep list of addrs */
+ struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+ return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf137726..ed388aae968 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__);
return;
}
- printk(" ip_addr %x port %hu\n"
+ printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
- struct nfs4_pnfs_ds *ds;
+ struct sockaddr_in *a, *b;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (addr1->sa_family != addr2->sa_family)
+ return false;
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr1;
+ b = (struct sockaddr_in *)addr2;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return true;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr1;
+ b6 = (struct sockaddr_in6 *)addr2;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ return false;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return true;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr1->sa_family);
+ return false;
+ }
- dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
- ntohl(ip_addr), ntohs(port));
+ return false;
+}
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (ds->ds_ip_addr == ip_addr &&
- ds->ds_port == port) {
- return ds;
+/*
+ * Lookup DS by addresses. The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+ struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+
+ list_for_each_entry(da1, dsaddrs, da_node) {
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+ if (same_sockaddr(
+ (struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return ds;
+ }
}
}
return NULL;
}
/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+ struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+ size_t count1 = 0,
+ count2 = 0;
+
+ list_for_each_entry(da1, dsaddrs1, da_node)
+ count1++;
+
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ bool found = false;
+ count2++;
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return false;
+ }
+
+ return (count1 == count2);
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
- struct nfs_client *clp;
- struct sockaddr_in sin;
+ struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs4_pnfs_ds_addr *da;
int status = 0;
- dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ds->ds_ip_addr;
- sin.sin_port = ds->ds_port;
+ BUG_ON(list_empty(&ds->ds_addrs));
+
+ list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ dprintk("%s: DS %s: trying address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP);
+ if (!IS_ERR(clp))
+ break;
+ }
- clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
- sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
}
ds->ds_clp = clp;
- dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s [existing] server=%s\n", __func__,
+ ds->ds_remotestr);
goto out;
}
@@ -135,8 +220,7 @@ nfs4_ds_connect(struc