aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma/transport.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtrdma/transport.c')
-rw-r--r--net/sunrpc/xprtrdma/transport.c262
1 files changed, 87 insertions, 175 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1dd6123070e..66f91f0d071 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -49,7 +49,9 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/sunrpc/addr.h>
#include "xprt_rdma.h"
@@ -84,131 +86,97 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1;
static struct ctl_table_header *sunrpc_table_header;
-static ctl_table xr_tunables_table[] = {
+static struct ctl_table xr_tunables_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_slot_table_entries",
.data = &xprt_rdma_slot_table_entries,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_slot_table_size,
.extra2 = &max_slot_table_size
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_max_inline_read",
.data = &xprt_rdma_max_inline_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_max_inline_write",
.data = &xprt_rdma_max_inline_write,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_inline_write_padding",
.data = &xprt_rdma_inline_write_padding,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &max_padding,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_memreg_strategy",
.data = &xprt_rdma_memreg_strategy,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_memreg,
.extra2 = &max_memreg,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "rdma_pad_optimize",
.data = &xprt_rdma_pad_optimize,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = 0,
+ .proc_handler = proc_dointvec,
},
+ { },
};
-static ctl_table sunrpc_table[] = {
+static struct ctl_table sunrpc_table[] = {
{
- .ctl_name = CTL_SUNRPC,
.procname = "sunrpc",
.mode = 0555,
.child = xr_tunables_table
},
- {
- .ctl_name = 0,
- },
+ { },
};
#endif
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
static void
xprt_rdma_format_addresses(struct rpc_xprt *xprt)
{
- struct sockaddr_in *addr = (struct sockaddr_in *)
+ struct sockaddr *sap = (struct sockaddr *)
&rpcx_to_rdmad(xprt).addr;
- char *buf;
+ struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+ char buf[64];
- buf = kzalloc(20, GFP_KERNEL);
- if (buf)
- snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr);
- xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+ (void)rpc_ntop(sap, buf, sizeof(buf));
+ xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
- buf = kzalloc(8, GFP_KERNEL);
- if (buf)
- snprintf(buf, 8, "%u", ntohs(addr->sin_port));
- xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+ snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
+ xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
- buf = kzalloc(48, GFP_KERNEL);
- if (buf)
- snprintf(buf, 48, "addr=%pI4 port=%u proto=%s",
- &addr->sin_addr.s_addr,
- ntohs(addr->sin_port), "rdma");
- xprt->address_strings[RPC_DISPLAY_ALL] = buf;
-
- buf = kzalloc(10, GFP_KERNEL);
- if (buf)
- snprintf(buf, 10, "%02x%02x%02x%02x",
- NIPQUAD(addr->sin_addr.s_addr));
- xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
-
- buf = kzalloc(8, GFP_KERNEL);
- if (buf)
- snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
- xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
-
- buf = kzalloc(30, GFP_KERNEL);
- if (buf)
- snprintf(buf, 30, "%pI4.%u.%u",
- &addr->sin_addr.s_addr,
- ntohs(addr->sin_port) >> 8,
- ntohs(addr->sin_port) & 0xff);
- xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+ snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+ snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
/* netid */
xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
@@ -237,23 +205,18 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->xprt;
int rc = 0;
- if (!xprt->shutdown) {
- xprt_clear_connected(xprt);
-
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
- rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- goto out;
- }
- goto out_clear;
+ current->flags |= PF_FSTRANS;
+ xprt_clear_connected(xprt);
-out:
- xprt_wake_pending_tasks(xprt, rc);
+ dprintk("RPC: %s: %sconnect\n", __func__,
+ r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+ rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ xprt_wake_pending_tasks(xprt, rc);
-out_clear:
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
+ current->flags &= ~PF_FSTRANS;
}
/*
@@ -271,27 +234,20 @@ static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
dprintk("RPC: %s: called\n", __func__);
- cancel_delayed_work(&r_xprt->rdma_connect);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&r_xprt->rdma_connect);
xprt_clear_connected(xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
- rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
- __func__, rc);
+ rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
- kfree(xprt->slot);
- xprt->slot = NULL;
- kfree(xprt);
+ xprt_free(xprt);
dprintk("RPC: %s: returning\n", __func__);
@@ -323,29 +279,20 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(-EBADF);
}
- xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+ xprt_rdma_slot_table_entries,
+ xprt_rdma_slot_table_entries);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__);
return ERR_PTR(-ENOMEM);
}
- xprt->max_reqs = xprt_rdma_slot_table_entries;
- xprt->slot = kcalloc(xprt->max_reqs,
- sizeof(struct rpc_rqst), GFP_KERNEL);
- if (xprt->slot == NULL) {
- dprintk("RPC: %s: couldn't allocate %d slots\n",
- __func__, xprt->max_reqs);
- kfree(xprt);
- return ERR_PTR(-ENOMEM);
- }
-
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
- xprt->bind_timeout = (60U * HZ);
- xprt->connect_timeout = (60U * HZ);
- xprt->reestablish_timeout = (5U * HZ);
- xprt->idle_timeout = (5U * 60 * HZ);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
@@ -445,12 +392,11 @@ out4:
xprt_rdma_free_addresses(xprt);
rc = -EINVAL;
out3:
- (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
return ERR_PTR(rc);
}
@@ -482,45 +428,24 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
}
static void
-xprt_rdma_connect(struct rpc_task *task)
+xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- if (!xprt_test_and_set_connecting(xprt)) {
- if (r_xprt->rx_ep.rep_connected != 0) {
- /* Reconnect */
- schedule_delayed_work(&r_xprt->rdma_connect,
- xprt->reestablish_timeout);
- xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > (30 * HZ))
- xprt->reestablish_timeout = (30 * HZ);
- else if (xprt->reestablish_timeout < (5 * HZ))
- xprt->reestablish_timeout = (5 * HZ);
- } else {
- schedule_delayed_work(&r_xprt->rdma_connect, 0);
- if (!RPC_IS_ASYNC(task))
- flush_scheduled_work();
- }
- }
-}
-
-static int
-xprt_rdma_reserve_xprt(struct rpc_task *task)
-{
- struct rpc_xprt *xprt = task->tk_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
-
- /* == RPC_CWNDSCALE @ init, but *after* setup */
- if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
- r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
- dprintk("RPC: %s: cwndscale %lu\n", __func__,
- r_xprt->rx_buf.rb_cwndscale);
- BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+ if (r_xprt->rx_ep.rep_connected != 0) {
+ /* Reconnect */
+ schedule_delayed_work(&r_xprt->rdma_connect,
+ xprt->reestablish_timeout);
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+ else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ } else {
+ schedule_delayed_work(&r_xprt->rdma_connect, 0);
+ if (!RPC_IS_ASYNC(task))
+ flush_delayed_work(&r_xprt->rdma_connect);
}
- xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
- return xprt_reserve_xprt_cong(task);
}
/*
@@ -534,11 +459,12 @@ xprt_rdma_reserve_xprt(struct rpc_task *task)
static void *
xprt_rdma_allocate(struct rpc_task *task, size_t size)
{
- struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
struct rpcrdma_req *req, *nreq;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
- BUG_ON(NULL == req);
+ if (req == NULL)
+ return NULL;
if (size > req->rl_size) {
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
@@ -562,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
* If the allocation or registration fails, the RPC framework
* will (doggedly) retry.
*/
- if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
- RPCRDMA_BOUNCEBUFFERS) {
- /* forced to "pure inline" */
- dprintk("RPC: %s: too much data (%zd) for inline "
- "(r/w max %d/%d)\n", __func__, size,
- rpcx_to_rdmad(xprt).inline_rsize,
- rpcx_to_rdmad(xprt).inline_wsize);
- size = req->rl_size;
- rpc_exit(task, -EIO); /* fail the operation */
- rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
- goto out;
- }
if (task->tk_flags & RPC_TASK_SWAPPER)
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
else
@@ -602,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
-out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
@@ -638,9 +551,7 @@ xprt_rdma_free(void *buffer)
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
- * Finish the deregistration. When using mw bind, this was
- * begun in rpcrdma_reply_handler(). In all other modes, we
- * do it here, in thread context. The process is considered
+ * Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
@@ -649,12 +560,7 @@ xprt_rdma_free(void *buffer)
for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- }
-
- if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
- rep->rr_func = NULL; /* abandon the callback */
- req->rl_reply = NULL;
+ &req->rl_segments[i], r_xprt);
}
if (req->rl_iov.length == 0) { /* see allocate above */
@@ -686,16 +592,15 @@ static int
xprt_rdma_send_request(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
- struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
- /* marshal the send itself */
- if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
- r_xprt->rx_stats.failed_marshal_count++;
- dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
- __func__);
- return -EIO;
+ if (req->rl_niovs == 0) {
+ rc = rpcrdma_marshal_req(rqst);
+ if (rc < 0)
+ goto failed_marshal;
}
if (req->rl_reply == NULL) /* e.g. reconnection */
@@ -715,10 +620,16 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
- task->tk_bytes_sent += rqst->rq_snd_buf.len;
+ rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
+failed_marshal:
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
+ __func__, rc);
+ if (rc == -EIO)
+ return -EIO;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
@@ -764,8 +675,9 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
- .reserve_xprt = xprt_rdma_reserve_xprt,
+ .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
+ .alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
@@ -791,7 +703,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int rc;
- dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+ dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
@@ -813,14 +725,14 @@ static int __init xprt_rdma_init(void)
if (rc)
return rc;
- dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+ dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
- dprintk(KERN_INFO "Defaults:\n");
- dprintk(KERN_INFO "\tSlots %d\n"
+ dprintk("Defaults:\n");
+ dprintk("\tSlots %d\n"
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
- dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+ dprintk("\tPadding %d\n\tMemreg %d\n",
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
#ifdef RPC_DEBUG