aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/callback_proc.c57
-rw-r--r--fs/nfs/client.c7
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/internal.h13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c80
-rw-r--r--fs/nfs/nfs4filelayout.h17
-rw-r--r--fs/nfs/nfs4filelayoutdev.c452
-rw-r--r--fs/nfs/nfs4proc.c215
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfs/nfs4xdr.c247
-rw-r--r--fs/nfs/objlayout/objio_osd.c20
-rw-r--r--fs/nfs/pagelist.c69
-rw-r--r--fs/nfs/pnfs.c221
-rw-r--r--fs/nfs/pnfs.h74
-rw-r--r--fs/nfs/pnfs_dev.c64
-rw-r--r--fs/nfs/read.c166
-rw-r--r--fs/nfs/unlink.c37
-rw-r--r--fs/nfs/write.c156
21 files changed, 1494 insertions, 434 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 81515545ba7..2cde5d95475 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@ config NFS_V4
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+ select SUNRPC_BACKCHANNEL
select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb..74780f9f852 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
- if (nfs_compare_fh(&args->cbl_fh,
- &NFS_I(lo->plh_inode)->fh))
- continue;
- ino = igrab(lo->plh_inode);
- if (!ino)
- continue;
- found = true;
- /* Without this, layout can be freed as soon
- * as we release cl_lock.
- */
- get_layout_hdr(lo);
- break;
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->plh_inode)->fh))
+ continue;
+ ino = igrab(lo->plh_inode);
+ if (!ino)
+ continue;
+ found = true;
+ /* Without this, layout can be freed as soon
+ * as we release cl_lock.
+ */
+ get_layout_hdr(lo);
+ break;
+ }
+ if (found)
+ break;
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
if (!found)
return NFS4ERR_NOMATCHING_LAYOUT;
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
static u32 initiate_bulk_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
+ struct nfs_server *server;
struct pnfs_layout_hdr *lo;
struct inode *ino;
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
};
spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if ((args->cbl_recall_type == RETURN_FSID) &&
- memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
- &args->cbl_fsid, sizeof(struct nfs_fsid)))
- continue;
- if (!igrab(lo->plh_inode))
+ memcmp(&server->fsid, &args->cbl_fsid,
+ sizeof(struct nfs_fsid)))
continue;
- get_layout_hdr(lo);
- BUG_ON(!list_empty(&lo->plh_bulk_recall));
- list_add(&lo->plh_bulk_recall, &recall_list);
+
+ list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!igrab(lo->plh_inode))
+ continue;
+ get_layout_hdr(lo);
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
}
+ rcu_read_unlock();
spin_unlock(&clp->cl_lock);
+
list_for_each_entry_safe(lo, tmp,
&recall_list, plh_bulk_recall) {
ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b88b65..19ea7d9c75e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
cred = rpc_lookup_machine_cred();
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
nfs_fscache_get_client_cookie(clp);
return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
nfs4_deviceid_purge_client(clp);
kfree(clp->cl_hostname);
+ kfree(clp->server_scope);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
INIT_LIST_HEAD(&server->delegations);
+ INIT_LIST_HEAD(&server->layouts);
atomic_set(&server->active, 0);
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
dprintk("<-- %s %p\n", __func__, clp);
return clp;
}
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2aec37..321a66bc384 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
return err;
}
-static void nfs_mark_return_delegation(struct nfs_delegation *delegation)
+static void nfs_mark_return_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
-
set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
- set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
+ set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
}
/**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
continue;
if (delegation->type & flags)
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
continue;
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
}
}
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
int nfs_async_inode_return_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
- struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
rcu_read_lock();
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_unlock();
return -ENOENT;
}
- nfs_mark_return_delegation(delegation);
+ nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347a2da..ab12913dd47 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen);
extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
+struct nfs_pageio_descriptor;
/* read.c */
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063bacd28..8102391bb37 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
}
#ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
{
struct gss_api_mech *mech;
struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2eb1ba..1909ee8be35 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_RECALL_SLOT,
NFS4CLNT_LEASE_CONFIRM,
+ NFS4CLNT_SERVER_SCOPE_MISMATCH,
};
enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
int cache_reply);
int (*validate_stateid)(struct nfs_delegation *,
const nfs4_stateid *);
+ int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fsinfo *);
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+ struct server_scope **);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03abcd04..be93a622872 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
__func__, data->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s USE DS:ip %x %hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
struct nfs_fh *fh;
int status;
+ if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+ return PNFS_NOT_ATTEMPTED;
+
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
- dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
+ /* FIXME: remove this check when layout segment support is added */
+ if (lgr->range.offset != 0 ||
+ lgr->range.length != NFS4_MAX_UINT64) {
+ dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+ __func__);
+ goto out;
+ }
+
if (fl->pattern_offset > lgr->range.offset) {
dprintk("%s pattern_offset %lld too large\n",
__func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ /* Found deviceid is being reaped */
+ if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+ goto out_put;
+
fl->dsaddr = dsaddr;
if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
* return true : coalesce page
* return false : don't coalesce page
*/
-bool
+static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;
- if (!pgio->pg_lseg)
- return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_READ,
+ GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ 0,
+ NFS4_MAX_UINT64,
+ IOMODE_RW,
+ GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_init = filelayout_pg_init_read,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_init = filelayout_pg_init_write,
+ .pg_test = filelayout_pg_test,
+ .pg_doio = pnfs_generic_pg_writepages,
+};
+
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
- .pg_test = filelayout_pg_test,
+ .pg_read_ops = &filelayout_pg_read_ops,
+ .pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
pnfs_unregister_layoutdriver(&filelayout_type);
}
+MODULE_ALIAS("nfs-layouttype4-1");
+
module_init(nfs4filelayout_init);
module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e3795..2e42284253f 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@ enum stripetype4 {
};
/* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+ struct sockaddr_storage da_addr;
+ size_t da_addrlen;
+ struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
+ char *da_remotestr; /* human readable addr+port */
+};
+
struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
- u32 ds_ip_addr;
- u32 ds_port;
+ char *ds_remotestr; /* comma sep list of addrs */
+ struct list_head ds_addrs;
struct nfs_client *ds_clp;
atomic_t ds_count;
};
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+ return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf137726..ed388aae968 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__);
return;
}
- printk(" ip_addr %x port %hu\n"
+ printk(" ds %s\n"
" ref count %d\n"
" client %p\n"
" cl_exchange_flags %x\n",
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
}
-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
{
- struct nfs4_pnfs_ds *ds;
+ struct sockaddr_in *a, *b;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (addr1->sa_family != addr2->sa_family)
+ return false;
+
+ switch (addr1->sa_family) {
+ case AF_INET:
+ a = (struct sockaddr_in *)addr1;
+ b = (struct sockaddr_in *)addr2;
+
+ if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+ a->sin_port == b->sin_port)
+ return true;
+ break;
+
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)addr1;
+ b6 = (struct sockaddr_in6 *)addr2;
+
+ /* LINKLOCAL addresses must have matching scope_id */
+ if (ipv6_addr_scope(&a6->sin6_addr) ==
+ IPV6_ADDR_SCOPE_LINKLOCAL &&
+ a6->sin6_scope_id != b6->sin6_scope_id)
+ return false;
+
+ if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+ a6->sin6_port == b6->sin6_port)
+ return true;
+ break;
+
+ default:
+ dprintk("%s: unhandled address family: %u\n",
+ __func__, addr1->sa_family);
+ return false;
+ }
- dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
- ntohl(ip_addr), ntohs(port));
+ return false;
+}
- list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
- if (ds->ds_ip_addr == ip_addr &&
- ds->ds_port == port) {
- return ds;
+/*
+ * Lookup DS by addresses. The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+ struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+
+ list_for_each_entry(da1, dsaddrs, da_node) {
+ list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+ list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+ if (same_sockaddr(
+ (struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr))
+ return ds;
+ }
}
}
return NULL;
}
/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+ struct list_head *dsaddrs2)
+{
+ struct nfs4_pnfs_ds_addr *da1, *da2;
+ size_t count1 = 0,
+ count2 = 0;
+
+ list_for_each_entry(da1, dsaddrs1, da_node)
+ count1++;
+
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ bool found = false;
+ count2++;
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+ (struct sockaddr *)&da2->da_addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return false;
+ }
+
+ return (count1 == count2);
+}
+
+/*
* Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
- struct nfs_client *clp;
- struct sockaddr_in sin;
+ struct nfs_client *clp = ERR_PTR(-EIO);
+ struct nfs4_pnfs_ds_addr *da;
int status = 0;
- dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+ dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = ds->ds_ip_addr;
- sin.sin_port = ds->ds_port;
+ BUG_ON(list_empty(&ds->ds_addrs));
+
+ list_for_each_entry(da, &ds->ds_addrs, da_node) {
+ dprintk("%s: DS %s: trying address %s\n",
+ __func__, ds->ds_remotestr, da->da_remotestr);
+
+ clp = nfs4_set_ds_client(mds_srv->nfs_client,
+ (struct sockaddr *)&da->da_addr,
+ da->da_addrlen, IPPROTO_TCP);
+ if (!IS_ERR(clp))
+ break;
+ }
- clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
- sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
}
ds->ds_clp = clp;
- dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
- ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+ dprintk("%s [existing] server=%s\n", __func__,
+ ds->ds_remotestr);
goto out;
}
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put;
ds->ds_clp = clp;
- dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
- ntohs(ds->ds_port));
+ dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
out_put:
@@ -147,12 +231,25 @@ out_put:
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
+ struct nfs4_pnfs_ds_addr *da;
+
dprintk("--> %s\n", __func__);
ifdebug(FACILITY)
print_ds(ds);
if (ds->ds_clp)
nfs_put_client(ds->ds_clp);
+
+ while (!list_empty(&ds->ds_addrs)) {
+ da = list_first_entry(&ds->ds_addrs,
+ struct nfs4_pnfs_ds_addr,
+ da_node);
+ list_del_init(&da->da_node);
+ kfree(da->da_remotestr);
+ kfree(da);
+ }
+
+ kfree(ds->ds_remotestr);
kfree(ds);
}
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+ struct nfs4_pnfs_ds_addr *da;
+ char *remotestr;
+ size_t len;
+ char *p;
+
+ len = 3; /* '{', '}' and eol */
+ list_for_each_entry(da, dsaddrs, da_node) {
+ len += strlen(da->da_remotestr) + 1; /* string plus comma */
+ }
+
+ remotestr = kzalloc(len, gfp_flags);
+ if (!remotestr)
+ return NULL;
+
+ p = remotestr;
+ *(p++) = '{';
+ len--;
+ list_for_each_entry(da, dsaddrs, da_node) {
+ size_t ll = strlen(da->da_remotestr);
+
+ if (ll > len)
+ goto out_err;
+
+ memcpy(p, da->da_remotestr, ll);
+ p += ll;
+ len -= ll;
+
+ if (len < 1)
+ goto out_err;
+ (*p++) = ',';
+ len--;
+ }
+ if (len < 2)
+ goto out_err;
+ *(p++) = '}';
+ *p = '\0';
+ return remotestr;
+out_err:
+ kfree(remotestr);
+ return NULL;
+}
+
static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *tmp_ds, *ds;
+ struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+ char *remotestr;
- ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+ if (list_empty(dsaddrs)) {
+ dprintk("%s: no addresses defined\n", __func__);
+ goto out;
+ }
+
+ ds = kzalloc(sizeof(*ds), gfp_flags);
if (!ds)
goto out;
+ /* this is only used for debugging, so it's ok if its NULL */
+ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
spin_lock(&nfs4_ds_cache_lock);
- tmp_ds = _data_server_lookup_locked(ip_addr, port);
+ tmp_ds = _data_server_lookup_locked(dsaddrs);
if (tmp_ds == NULL) {
- ds->ds_ip_addr = ip_addr;
- ds->ds_port = port;
+ INIT_LIST_HEAD(&ds->ds_addrs);
+ list_splice_init(dsaddrs, &ds->ds_addrs);
+ ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache);
- dprintk("%s add new data server ip 0x%x\n", __func__,
- ds->ds_ip_addr);
+ dprintk("%s add new data server %s\n", __func__,
+ ds->ds_remotestr);
} else {
+ if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+ dsaddrs)) {
+ dprintk("%s: multipath address mismatch: %s != %s",
+ __func__, tmp_ds->ds_remotestr, remotestr);
+ }
+ kfree(remotestr);
kfree(ds);
atomic_inc(&tmp_ds->ds_count);
- dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
- __func__, tmp_ds->ds_ip_addr,
+ dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+ __func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count));
ds = tmp_ds;
}
@@ -213,18 +375,22 @@ out:
}
/*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
*/
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{
- struct nfs4_pnfs_ds *ds = NULL;
- char *buf;
- const char *ipend, *pstr;
- u32 ip_addr, port;
- int nlen, rlen, i;
+ struct nfs4_pnfs_ds_addr *da = NULL;
+ char *buf, *portstr;
+ u32 port;
+ int nlen, rlen;
int tmp[2];
__be32 *p;
+ char *netid, *match_netid;
+ size_t len, match_netid_len;
+ char *startsep = "";
+ char *endsep = "";
+
/* r_netid */
p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
if (unlikely(!p))
goto out_err;
- /* Check that netid is "tcp" */
- if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
- dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+ netid = kmalloc(nlen+1, gfp_flags);
+ if (unlikely(!netid))
goto out_err;
- }
- /* r_addr */
+ netid[nlen] = '\0';
+ memcpy(netid, p, nlen);
+
+ /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p))
- goto out_err;
+ goto out_free_netid;
- /* ipv6 length plus port is legal */
- if (rlen > INET6_ADDRSTRLEN + 8) {
+ /* port is ".ABC.DEF", 8 chars max */
+ if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__,
rlen);
- goto out_err;
+ goto out_free_netid;
}
buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) {
dprintk("%s: Not enough memory\n", __func__);
- goto out_err;
+ goto out_free_netid;
}
buf[rlen] = '\0';
memcpy(buf, p, rlen);
- /* replace the port dots with dashes for the in4_pton() delimiter*/
- for (i = 0; i < 2; i++) {
- char *res = strrchr(buf, '.');
- if (!res) {
- dprintk("%s: Failed finding expected dots in port\n",
- __func__);
- goto out_free;
- }
- *res = '-';
+ /* replace port '.' with '-' */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot in port\n",
+ __func__);
+ goto out_free_buf;
+ }
+ *portstr = '-';
+
+ /* find '.' between address and port */
+ portstr = strrchr(buf, '.');
+ if (!portstr) {
+ dprintk("%s: Failed finding expected dot between address and "
+ "port\n", __func__);
+ goto out_free_buf;
}
+ *portstr = '\0';
- /* Currently only support ipv4 address */
- if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
- dprintk("%s: Only ipv4 addresses supported\n", __func__);
- goto out_free;
+ da = kzalloc(sizeof(*da), gfp_flags);
+ if (unlikely(!da))
+ goto out_free_buf;
+
+ INIT_LIST_HEAD(&da->da_node);
+
+ if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+ sizeof(d