diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/transport.c')
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 262 |
1 files changed, 87 insertions, 175 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1dd6123070e..66f91f0d071 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -49,7 +49,9 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/seq_file.h> +#include <linux/sunrpc/addr.h> #include "xprt_rdma.h" @@ -84,131 +86,97 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; -static ctl_table xr_tunables_table[] = { +static struct ctl_table xr_tunables_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_read", .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_write", .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_inline_write_padding", .data = &xprt_rdma_inline_write_padding, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &max_padding, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_memreg_strategy", .data = &xprt_rdma_memreg_strategy, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_memreg, .extra2 = &max_memreg, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_pad_optimize", .data = &xprt_rdma_pad_optimize, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = 0, + .proc_handler = proc_dointvec, }, + { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xr_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif +#define RPCRDMA_BIND_TO (60U * HZ) +#define RPCRDMA_INIT_REEST_TO (5U * HZ) +#define RPCRDMA_MAX_REEST_TO (30U * HZ) +#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) + static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) + struct sockaddr *sap = (struct sockaddr *) &rpcx_to_rdmad(xprt).addr; - char *buf; + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + char buf[64]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%u", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - buf = kzalloc(48, GFP_KERNEL); - if (buf) - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), "rdma"); - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; @@ -237,23 +205,18 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpc_xprt *xprt = &r_xprt->xprt; int rc = 0; - if (!xprt->shutdown) { - xprt_clear_connected(xprt); - - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - goto out; - } - goto out_clear; + current->flags |= PF_FSTRANS; + xprt_clear_connected(xprt); -out: - xprt_wake_pending_tasks(xprt, rc); + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + xprt_wake_pending_tasks(xprt, rc); -out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); + current->flags &= ~PF_FSTRANS; } /* @@ -271,27 +234,20 @@ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int rc; dprintk("RPC: %s: called\n", __func__); - cancel_delayed_work(&r_xprt->rdma_connect); - flush_scheduled_work(); + cancel_delayed_work_sync(&r_xprt->rdma_connect); xprt_clear_connected(xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); - rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", - __func__, rc); + rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); - kfree(xprt->slot); - xprt->slot = NULL; - kfree(xprt); + xprt_free(xprt); dprintk("RPC: %s: returning\n", __func__); @@ -323,29 +279,20 @@ xprt_setup_rdma(struct xprt_create *args) return ERR_PTR(-EBADF); } - xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries, + xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", __func__); return ERR_PTR(-ENOMEM); } - xprt->max_reqs = xprt_rdma_slot_table_entries; - xprt->slot = kcalloc(xprt->max_reqs, - sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - dprintk("RPC: %s: couldn't allocate %d slots\n", - __func__, xprt->max_reqs); - kfree(xprt); - return ERR_PTR(-ENOMEM); - } - /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; - xprt->bind_timeout = (60U * HZ); - xprt->connect_timeout = (60U * HZ); - xprt->reestablish_timeout = (5U * HZ); - xprt->idle_timeout = (5U * 60 * HZ); + xprt->bind_timeout = RPCRDMA_BIND_TO; + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; + xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ @@ -445,12 +392,11 @@ out4: xprt_rdma_free_addresses(xprt); rc = -EINVAL; out3: - (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); + rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: - kfree(xprt->slot); - kfree(xprt); + xprt_free(xprt); return ERR_PTR(rc); } @@ -482,45 +428,24 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) } static void -xprt_rdma_connect(struct rpc_task *task) +xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - if (!xprt_test_and_set_connecting(xprt)) { - if (r_xprt->rx_ep.rep_connected != 0) { - /* Reconnect */ - schedule_delayed_work(&r_xprt->rdma_connect, - xprt->reestablish_timeout); - xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout > (30 * HZ)) - xprt->reestablish_timeout = (30 * HZ); - else if (xprt->reestablish_timeout < (5 * HZ)) - xprt->reestablish_timeout = (5 * HZ); - } else { - schedule_delayed_work(&r_xprt->rdma_connect, 0); - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); - } - } -} - -static int -xprt_rdma_reserve_xprt(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int credits = atomic_read(&r_xprt->rx_buf.rb_credits); - - /* == RPC_CWNDSCALE @ init, but *after* setup */ - if (r_xprt->rx_buf.rb_cwndscale == 0UL) { - r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; - dprintk("RPC: %s: cwndscale %lu\n", __func__, - r_xprt->rx_buf.rb_cwndscale); - BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); + if (r_xprt->rx_ep.rep_connected != 0) { + /* Reconnect */ + schedule_delayed_work(&r_xprt->rdma_connect, + xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) + xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; + else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) + xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; + } else { + schedule_delayed_work(&r_xprt->rdma_connect, 0); + if (!RPC_IS_ASYNC(task)) + flush_delayed_work(&r_xprt->rdma_connect); } - xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(task); } /* @@ -534,11 +459,12 @@ xprt_rdma_reserve_xprt(struct rpc_task *task) static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpcrdma_req *req, *nreq; req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); - BUG_ON(NULL == req); + if (req == NULL) + return NULL; if (size > req->rl_size) { dprintk("RPC: %s: size %zd too large for buffer[%zd]: " @@ -562,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) * If the allocation or registration fails, the RPC framework * will (doggedly) retry. */ - if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == - RPCRDMA_BOUNCEBUFFERS) { - /* forced to "pure inline" */ - dprintk("RPC: %s: too much data (%zd) for inline " - "(r/w max %d/%d)\n", __func__, size, - rpcx_to_rdmad(xprt).inline_rsize, - rpcx_to_rdmad(xprt).inline_wsize); - size = req->rl_size; - rpc_exit(task, -EIO); /* fail the operation */ - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; - goto out; - } if (task->tk_flags & RPC_TASK_SWAPPER) nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); else @@ -602,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) req = nreq; } dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); -out: req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_xdr_buf; @@ -638,9 +551,7 @@ xprt_rdma_free(void *buffer) __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); /* - * Finish the deregistration. When using mw bind, this was - * begun in rpcrdma_reply_handler(). In all other modes, we - * do it here, in thread context. The process is considered + * Finish the deregistration. The process is considered * complete when the rr_func vector becomes NULL - this * was put in place during rpcrdma_reply_handler() - the wait * call below will not block if the dereg is "done". If @@ -649,12 +560,7 @@ xprt_rdma_free(void *buffer) for (i = 0; req->rl_nchunks;) { --req->rl_nchunks; i += rpcrdma_deregister_external( - &req->rl_segments[i], r_xprt, NULL); - } - - if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { - rep->rr_func = NULL; /* abandon the callback */ - req->rl_reply = NULL; + &req->rl_segments[i], r_xprt); } if (req->rl_iov.length == 0) { /* see allocate above */ @@ -686,16 +592,15 @@ static int xprt_rdma_send_request(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + int rc; - /* marshal the send itself */ - if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { - r_xprt->rx_stats.failed_marshal_count++; - dprintk("RPC: %s: rpcrdma_marshal_req failed\n", - __func__); - return -EIO; + if (req->rl_niovs == 0) { + rc = rpcrdma_marshal_req(rqst); + if (rc < 0) + goto failed_marshal; } if (req->rl_reply == NULL) /* e.g. reconnection */ @@ -715,10 +620,16 @@ xprt_rdma_send_request(struct rpc_task *task) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; - task->tk_bytes_sent += rqst->rq_snd_buf.len; + rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; +failed_marshal: + r_xprt->rx_stats.failed_marshal_count++; + dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", + __func__, rc); + if (rc == -EIO) + return -EIO; drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ @@ -764,8 +675,9 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) */ static struct rpc_xprt_ops xprt_rdma_procs = { - .reserve_xprt = xprt_rdma_reserve_xprt, + .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ + .alloc_slot = xprt_alloc_slot, .release_request = xprt_release_rqst_cong, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ @@ -791,7 +703,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); @@ -813,14 +725,14 @@ static int __init xprt_rdma_init(void) if (rc) return rc; - dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); + dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); - dprintk(KERN_INFO "Defaults:\n"); - dprintk(KERN_INFO "\tSlots %d\n" + dprintk("Defaults:\n"); + dprintk("\tSlots %d\n" "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", + dprintk("\tPadding %d\n\tMemreg %d\n", xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); #ifdef RPC_DEBUG |
