diff options
Diffstat (limited to 'net/9p/trans_rdma.c')
| -rw-r--r-- | net/9p/trans_rdma.c | 198 |
1 files changed, 125 insertions, 73 deletions
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 65cb29db03f..14ad43b5cf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -40,6 +42,7 @@ #include <linux/file.h> #include <linux/parser.h> #include <linux/semaphore.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> @@ -54,11 +57,8 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ -#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) /** * struct p9_trans_rdma - RDMA transport instance * @@ -73,7 +73,9 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -98,7 +100,8 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -166,8 +169,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) char *p; substring_t args[MAX_OPT_ARGS]; int option; - char *options; - int ret; + char *options, *tmp_options; opts->port = P9_PORT; opts->sq_depth = P9_RDMA_SQ_DEPTH; @@ -177,12 +179,13 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!params) return 0; - options = kstrdup(params, GFP_KERNEL); - if (!options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -190,11 +193,12 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); + if (token == Opt_err) + continue; r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } switch (token) { @@ -216,7 +220,7 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) } /* RQ must be at least as large as the SQ */ opts->rq_depth = max(opts->rq_depth, opts->sq_depth); - kfree(options); + kfree(tmp_options); return 0; } @@ -295,18 +299,22 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; - req->status = REQ_STATUS_RCVD; - p9_client_cb(client, req); + p9_client_cb(client, req, REQ_STATUS_RCVD); return; err_out: - P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", - req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; - return; } static void @@ -320,8 +328,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, static void qp_event_handler(struct ib_event *event, void *context) { - P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, - context); + p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", + event->event, context); } static void cq_comp_handler(struct ib_cq *cq, void *cq_context) @@ -337,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -347,8 +355,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) break; default: - printk(KERN_ERR "9prdma: unexpected completion type, " - "c->wc_op=%d, wc.opcode=%d, status=%d\n", + pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } @@ -358,7 +365,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) static void cq_event_handler(struct ib_event *e, void *v) { - P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); + p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); } static void rdma_destroy_trans(struct p9_trans_rdma *rdma) @@ -409,7 +416,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) return ib_post_recv(rdma->qp, &wr, &bad_wr); error: - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; } @@ -423,30 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; - /* Allocate an fcall for the reply */ - rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); - if (!rpl_context) - goto err_close; - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_KERNEL); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); } } - rpl_context->rc = req->rc; - if (!rpl_context->rc) { - kfree(rpl_context); - goto err_close; + + /* Allocate an fcall for the reply */ + rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); + if (!rpl_context) { + err = -ENOMEM; + goto recv_error; } + rpl_context->rc = req->rc; /* * Post a receive buffer for this request. We need to ensure @@ -455,30 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) { - kfree(rpl_context->rc); - kfree(rpl_context); - goto err_close; - } - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ - c = kmalloc(sizeof *c, GFP_KERNEL); - if (!c) - goto err_close; + c = kmalloc(sizeof *c, GFP_NOFS); + if (!c) { + err = -ENOMEM; + goto send_error; + } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -492,16 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + /* Mark request as `sent' *before* we actually send it, + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ + req->status = REQ_STATUS_SENT; + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: + req->status = REQ_STATUS_ERROR; + kfree(c); + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); - err_close: + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: + kfree(rpl_context); spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; @@ -546,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } -/* its not clear to me we can do anything after send has been posted */ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) { + /* Nothing to do here. + * We will take care of it (if we have to) in rdma_cancelled() + */ return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance @@ -584,7 +634,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; @@ -689,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = { .close = rdma_close, .request = rdma_request, .cancel = rdma_cancel, + .cancelled = rdma_cancelled, }; /** |
