diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2007-07-25 11:08:28 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-10-09 20:05:49 -0700 |
commit | 4ee97180ac76deb5a715ac45b7d7516e6ee82ae7 (patch) | |
tree | 6683d1c34d3f36271a9d8275a645ce67222ffc56 /drivers/infiniband/hw | |
parent | 210d6ca3db058cd1d6e6fd235ee3e25d6ac221cd (diff) |
IB/ipath: Change UD to queue work requests like RC & UC
The code to post UD sends tried to process work requests at the time
ib_post_send() is called without using a WQE queue. This was fine as
long as HW resources were available for sending a packet. This patch
changes UD to be handled more like RC and UC and shares more code.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_qp.c | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_rc.c | 61 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ruc.c | 308 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_uc.c | 77 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_ud.c | 372 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.c | 241 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_verbs.h | 35 |
7 files changed, 494 insertions, 611 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 1324b35ff1f..a8c4a6b03d7 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -338,6 +338,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_busy = 0; qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; + qp->s_wqe = NULL; qp->s_psn = 0; qp->r_psn = 0; qp->r_msn = 0; @@ -751,6 +752,9 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, switch (init_attr->qp_type) { case IB_QPT_UC: case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: sz = sizeof(struct ipath_sge) * init_attr->cap.max_send_sge + sizeof(struct ipath_swqe); @@ -759,10 +763,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, ret = ERR_PTR(-ENOMEM); goto bail; } - /* FALLTHROUGH */ - case IB_QPT_UD: - case IB_QPT_SMI: - case IB_QPT_GSI: sz = sizeof(*qp); if (init_attr->srq) { struct ipath_srq *srq = to_isrq(init_attr->srq); @@ -805,8 +805,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); init_waitqueue_head(&qp->wait); - tasklet_init(&qp->s_task, ipath_do_ruc_send, - (unsigned long)qp); + tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); INIT_LIST_HEAD(&qp->piowait); INIT_LIST_HEAD(&qp->timerwait); qp->state = IB_QPS_RESET; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 46744ea2bab..53259daeb4f 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -81,9 +81,8 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) * Note that we are in the responder's side of the QP context. * Note the QP s_lock must be held. */ -static int ipath_make_rc_ack(struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 pmtu, u32 *bth0p, u32 *bth2p) +static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp, + struct ipath_other_headers *ohdr, u32 pmtu) { struct ipath_ack_entry *e; u32 hwords; @@ -192,8 +191,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, } qp->s_hdrwords = hwords; qp->s_cur_size = len; - *bth0p = bth0 | (1 << 22); /* Set M bit */ - *bth2p = bth2; + ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2); return 1; bail: @@ -203,32 +201,39 @@ bail: /** * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP - * @ohdr: a pointer to the IB header being constructed - * @pmtu: the path MTU - * @bth0p: pointer to the BTH opcode word - * @bth2p: pointer to the BTH PSN word * * Return 1 if constructed; otherwise, return 0. - * Note the QP s_lock must be held and interrupts disabled. */ -int ipath_make_rc_req(struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 pmtu, u32 *bth0p, u32 *bth2p) +int ipath_make_rc_req(struct ipath_qp *qp) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); + struct ipath_other_headers *ohdr; struct ipath_sge_state *ss; struct ipath_swqe *wqe; u32 hwords; u32 len; u32 bth0; u32 bth2; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); char newreq; + unsigned long flags; + int ret = 0; + + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + + /* + * The lock is needed to synchronize between the sending tasklet, + * the receive interrupt handler, and timeout resends. + */ + spin_lock_irqsave(&qp->s_lock, flags); /* Sending responses has higher priority over sending requests. */ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue || (qp->s_flags & IPATH_S_ACK_PENDING) || qp->s_ack_state != OP(ACKNOWLEDGE)) && - ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p)) + ipath_make_rc_ack(dev, qp, ohdr, pmtu)) goto done; if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || @@ -560,13 +565,12 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_hdrwords = hwords; qp->s_cur_sge = ss; qp->s_cur_size = len; - *bth0p = bth0 | (qp->s_state << 24); - *bth2p = bth2; + ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - return 1; - + ret = 1; bail: - return 0; + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; } /** @@ -627,7 +631,7 @@ static void send_rc_ack(struct ipath_qp *qp) /* * If we can send the ACK, clear the ACK state. */ - if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) { + if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) { dev->n_unicast_xmit++; goto done; } @@ -757,7 +761,9 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) wc->vendor_err = 0; wc->byte_len = 0; wc->qp = &qp->ibqp; + wc->imm_data = 0; wc->src_qp = qp->remote_qpn; + wc->wc_flags = 0; wc->pkey_index = 0; wc->slid = qp->remote_ah_attr.dlid; wc->sl = qp->remote_ah_attr.sl; @@ -1041,7 +1047,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, wc.vendor_err = 0; wc.byte_len = 0; wc.qp = &qp->ibqp; + wc.imm_data = 0; wc.src_qp = qp->remote_qpn; + wc.wc_flags = 0; wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; @@ -1454,6 +1462,19 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, goto send_ack; } /* + * Try to send a simple ACK to work around a Mellanox bug + * which doesn't accept a RDMA read response or atomic + * response as an ACK for earlier SENDs or RDMA writes. + */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue && + !(qp->s_flags & IPATH_S_ACK_PENDING) && + qp->s_ack_state == OP(ACKNOWLEDGE)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; + goto send_ack; + } + /* * Resend the RDMA read or atomic op which * ACKs this duplicate request. */ diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index c69c2523944..4b6b7ee8e5c 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -31,6 +31,8 @@ * SOFTWARE. */ +#include <linux/spinlock.h> + #include "ipath_verbs.h" #include "ipath_kernel.h" @@ -106,27 +108,30 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp) spin_unlock_irqrestore(&dev->pending_lock, flags); } -static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe) +/** + * ipath_init_sge - Validate a RWQE and fill in the SGE state + * @qp: the QP + * + * Return 1 if OK. + */ +int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, + u32 *lengthp, struct ipath_sge_state *ss) { - int user = to_ipd(qp->ibqp.pd)->user; int i, j, ret; struct ib_wc wc; - qp->r_len = 0; + *lengthp = 0; for (i = j = 0; i < wqe->num_sge; i++) { if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE)) + if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; + *lengthp += wqe->sg_list[i].length; j++; } - qp->r_sge.sge = qp->r_sg_list[0]; - qp->r_sge.sg_list = qp->r_sg_list + 1; - qp->r_sge.num_sge = j; + ss->num_sge = j; ret = 1; goto bail; @@ -172,6 +177,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) u32 tail; int ret; + qp->r_sge.sg_list = qp->r_sg_list; + if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; @@ -199,7 +206,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; - } while (!wr_id_only && !init_sge(qp, wqe)); + } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len, + &qp->r_sge)); qp->r_wr_id = wqe->wr_id; wq->tail = tail; @@ -239,9 +247,9 @@ bail: /** * ipath_ruc_loopback - handle UC and RC lookback requests - * @sqp: the loopback QP + * @sqp: the sending QP * - * This is called from ipath_do_uc_send() or ipath_do_rc_send() to + * This is called from ipath_do_send() to * forward a WQE addressed to the same HCA. * Note that although we are single threaded due to the tasklet, we still * have to protect against post_send(). We don't have to worry about @@ -450,40 +458,18 @@ again: wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - /* XXX do we know which pkey matched? Only needed for GSI. */ wc.pkey_index = 0; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; wc.dlid_path_bits = 0; + wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; - - if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - wc.wr_id = wqe->wr.wr_id; - wc.status = IB_WC_SUCCESS; - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc.vendor_err = 0; - wc.byte_len = wqe->length; - wc.qp = &sqp->ibqp; - wc.src_qp = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0); - } - - /* Update s_last now that we are finished with the SWQE */ - spin_lock_irqsave(&sqp->s_lock, flags); - if (++sqp->s_last >= sqp->s_size) - sqp->s_last = 0; - spin_unlock_irqrestore(&sqp->s_lock, flags); + ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); goto again; done: @@ -491,13 +477,11 @@ done: wake_up(&qp->wait); } -static int want_buffer(struct ipath_devdata *dd) +static void want_buffer(struct ipath_devdata *dd) { set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); - - return 0; } /** @@ -507,14 +491,11 @@ static int want_buffer(struct ipath_devdata *dd) * * Called when we run out of PIO buffers. */ -static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) +static void ipath_no_bufs_available(struct ipath_qp *qp, + struct ipath_ibdev *dev) { unsigned long flags; - spin_lock_irqsave(&dev->pending_lock, flags); - if (list_empty(&qp->piowait)) - list_add_tail(&qp->piowait, &dev->piowait); - spin_unlock_irqrestore(&dev->pending_lock, flags); /* * Note that as soon as want_buffer() is called and * possibly before it returns, ipath_ib_piobufavail() @@ -524,101 +505,14 @@ static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev * We leave the busy flag set so that another post send doesn't * try to put the same QP on the piowait list again. */ + spin_lock_irqsave(&dev->pending_lock, flags); + list_add_tail(&qp->piowait, &dev->piowait); + spin_unlock_irqrestore(&dev->pending_lock, flags); want_buffer(dev->dd); dev->n_piowait++; } /** - * ipath_post_ruc_send - post RC and UC sends - * @qp: the QP to post on - * @wr: the work request to send - */ -int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr) -{ - struct ipath_swqe *wqe; - unsigned long flags; - u32 next; - int i, j; - int acc; - int ret; - - /* - * Don't allow RDMA reads or atomic operations on UC or - * undefined operations. - * Make sure buffer is large enough to hold the result for atomics. - */ - if (qp->ibqp.qp_type == IB_QPT_UC) { - if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) { - ret = -EINVAL; - goto bail; - } - } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { - ret = -EINVAL; - goto bail; - } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && - (wr->num_sge == 0 || - wr->sg_list[0].length < sizeof(u64) || - wr->sg_list[0].addr & (sizeof(u64) - 1))) { - ret = -EINVAL; - goto bail; - } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { - ret = -EINVAL; - goto bail; - } - /* IB spec says that num_sge == 0 is OK. */ - if (wr->num_sge > qp->s_max_sge) { - ret = -ENOMEM; - goto bail; - } - spin_lock_irqsave(&qp->s_lock, flags); - next = qp->s_head + 1; - if (next >= qp->s_size) - next = 0; - if (next == qp->s_last) { - spin_unlock_irqrestore(&qp->s_lock, flags); - ret = -EINVAL; - goto bail; - } - - wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; - wqe->ssn = qp->s_ssn++; - wqe->sg_list[0].mr = NULL; - wqe->sg_list[0].vaddr = NULL; - wqe->sg_list[0].length = 0; - wqe->sg_list[0].sge_length = 0; - wqe->length = 0; - acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; - for (i = 0, j = 0; i < wr->num_sge; i++) { - if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) { - spin_unlock_irqrestore(&qp->s_lock, flags); - ret = -EINVAL; - goto bail; - } - if (wr->sg_list[i].length == 0) - continue; - if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i], - acc)) { - spin_unlock_irqrestore(&qp->s_lock, flags); - ret = -EINVAL; - goto bail; - } - wqe->length += wr->sg_list[i].length; - j++; - } - wqe->wr.num_sge = j; - qp->s_head = next; - spin_unlock_irqrestore(&qp->s_lock, flags); - - ipath_do_ruc_send((unsigned long) qp); - - ret = 0; - -bail: - return ret; -} - -/** * ipath_make_grh - construct a GRH header * @dev: a pointer to the ipath device * @hdr: a pointer to the GRH header being constructed @@ -648,39 +542,66 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } +void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, + struct ipath_other_headers *ohdr, + u32 bth0, u32 bth2) +{ + u16 lrh0; + u32 nwords; + u32 extra_bytes; + + /* Construct the header. */ + extra_bytes = -qp->s_cur_size & 3; + nwords = (qp->s_cur_size + extra_bytes) >> 2; + lrh0 = IPATH_LRH_BTH; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, + &qp->remote_ah_attr.grh, + qp->s_hdrwords, nwords); + lrh0 = IPATH_LRH_GRH; + } + lrh0 |= qp->remote_ah_attr.sl << 4; + qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); + bth0 |= extra_bytes << 20; + ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(bth2); +} + /** - * ipath_do_ruc_send - perform a send on an RC or UC QP + * ipath_do_send - perform a send on a QP * @data: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP (tasklet). - * Otherwise, after we drop the QP s_lock, two threads could send - * packets out of order. + * Otherwise, two threads could send packets out of order. */ -void ipath_do_ruc_send(unsigned long data) +void ipath_do_send(unsigned long data) { struct ipath_qp *qp = (struct ipath_qp *)data; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned long flags; - u16 lrh0; - u32 nwords; - u32 extra_bytes; - u32 bth0; - u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); - struct ipath_other_headers *ohdr; + int (*make_req)(struct ipath_qp *qp); if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) goto bail; - if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) { + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC) && + qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { ipath_ruc_loopback(qp); goto clear; } - ohdr = &qp->s_hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) - ohdr = &qp->s_hdr.u.l.oth; + if (qp->ibqp.qp_type == IB_QPT_RC) + make_req = ipath_make_rc_req; + else if (qp->ibqp.qp_type == IB_QPT_UC) + make_req = ipath_make_uc_req; + else + make_req = ipath_make_ud_req; again: /* Check for a constructed packet to be sent. */ @@ -689,9 +610,8 @@ again: * If no PIO bufs are available, return. An interrupt will * call ipath_ib_piobufavail() when one is available. */ - if (ipath_verbs_send(dev->dd, qp->s_hdrwords, - (u32 *) &qp->s_hdr, qp->s_cur_size, - qp->s_cur_sge)) { + if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, + qp->s_cur_sge, qp->s_cur_size)) { ipath_no_bufs_available(qp, dev); goto bail; } @@ -700,54 +620,42 @@ again: qp->s_hdrwords = 0; } - /* - * The lock is needed to synchronize between setting - * qp->s_ack_state, resend timer, and post_send(). - */ - spin_lock_irqsave(&qp->s_lock, flags); - - if (!((qp->ibqp.qp_type == IB_QPT_RC) ? - ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) : - ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) { - /* - * Clear the busy bit before unlocking to avoid races with - * adding new work queue items and then failing to process - * them. - */ - clear_bit(IPATH_S_BUSY, &qp->s_busy); - spin_unlock_irqrestore(&qp->s_lock, flags); - goto bail; - } + if (make_req(qp)) + goto again; +clear: + clear_bit(IPATH_S_BUSY, &qp->s_busy); +bail:; +} - spin_unlock_irqrestore(&qp->s_lock, flags); +void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, + enum ib_wc_status status) +{ + u32 last = qp->s_last; - /* Construct the header. */ - extra_bytes = (4 - qp->s_cur_size) & 3; - nwords = (qp->s_cur_size + extra_bytes) >> 2; - lrh0 = IPATH_LRH_BTH; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); - lrh0 = IPATH_LRH_GRH; - } - lrh0 |= qp->remote_ah_attr.sl << 4; - qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); - qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); - qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + - SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); - bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); - bth0 |= extra_bytes << 20; - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[2] = cpu_to_be32(bth2); + if (++last == qp->s_size) + last = 0; + qp->s_last = last; - /* Check for more work to do. */ - goto again; + /* See ch. 11.2.4.1 and 10.7.3.1 */ + if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; -clear: - clear_bit(IPATH_S_BUSY, &qp->s_busy); -bail: - return; + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; + wc.vendor_err = 0; + wc.byte_len = wqe->length; + wc.imm_data = 0; + wc.qp = &qp->ibqp; + wc.src_qp = 0; + wc.wc_flags = 0; + wc.pkey_index = 0; + wc.slid = 0; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } } diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 8380fbc50d2..767beb903c2 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -37,72 +37,40 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_UC_##x -static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, - struct ib_wc *wc) -{ - if (++qp->s_last == qp->s_size) - qp->s_last = 0; - if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || - (wqe->wr.send_flags & IB_SEND_SIGNALED)) { - wc->wr_id = wqe->wr.wr_id; - wc->status = IB_WC_SUCCESS; - wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; - wc->vendor_err = 0; - wc->byte_len = wqe->length; - wc->qp = &qp->ibqp; - wc->src_qp = qp->remote_qpn; - wc->pkey_index = 0; - wc->slid = qp->remote_ah_attr.dlid; - wc->sl = qp->remote_ah_attr.sl; - wc->dlid_path_bits = 0; - wc->port_num = 0; - ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0); - } -} - /** * ipath_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP - * @ohdr: a pointer to the IB header being constructed - * @pmtu: the path MTU - * @bth0p: pointer to the BTH opcode word - * @bth2p: pointer to the BTH PSN word * * Return 1 if constructed; otherwise, return 0. - * Note the QP s_lock must be held and interrupts disabled. */ -int ipath_make_uc_req(struct ipath_qp *qp, - struct ipath_other_headers *ohdr, - u32 pmtu, u32 *bth0p, u32 *bth2p) +int ipath_make_uc_req(struct ipath_qp *qp) { + struct ipath_other_headers *ohdr; struct ipath_swqe *wqe; u32 hwords; u32 bth0; u32 len; - struct ib_wc wc; + u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + int ret = 0; if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) goto done; + ohdr = &qp->s_hdr.u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr.u.l.oth; + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 1 << 22; /* Set M bit */ /* Get the next send request. */ - wqe = get_swqe_ptr(qp, qp->s_last); + wqe = get_swqe_ptr(qp, qp->s_cur); + qp->s_wqe = NULL; switch (qp->s_state) { default: - /* - * Signal the completion of the last send - * (if there is one). - */ - if (qp->s_last != qp->s_tail) { - complete_last_send(qp, wqe, &wc); - wqe = get_swqe_ptr(qp, qp->s_last); - } - /* Check if send work queue is empty. */ - if (qp->s_tail == qp->s_head) + if (qp->s_cur == qp->s_head) goto done; /* * Start a new request. @@ -131,6 +99,9 @@ int ipath_make_uc_req(struct ipath_qp *qp, } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; break; case IB_WR_RDMA_WRITE: @@ -157,13 +128,14 @@ int ipath_make_uc_req(struct ipath_qp *qp, if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; break; default: goto done; } - if (++qp->s_tail >= qp->s_size) - qp->s_tail = 0; break; case OP(SEND_FIRST): @@ -185,6 +157,9 @@ int ipath_make_uc_req(struct ipath_qp *qp, } if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; break; case OP(RDMA_WRITE_FIRST): @@ -207,18 +182,22 @@ int ipath_make_uc_req(struct ipath_qp *qp, if (wqe->wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; break; } qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; - *bth0p = bth0 | (qp->s_state << 24); - *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK; - return 1; + ipath_make_ruc_header(to_idev(qp->ibqp.device), + qp, ohdr, bth0 | (qp->s_state << 24), + qp->s_next_psn++ & IPATH_PSN_MASK); + ret = 1; done: - return 0; + return ret; } /** diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index f9a3338a5fb..34c4a0a5be3 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -36,68 +36,17 @@ #include "ipath_verbs.h" #include "ipath_kernel.h" -static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe, - u32 *lengthp, struct ipath_sge_state *ss) -{ - int user = to_ipd(qp->ibqp.pd)->user; - int i, j, ret; - struct ib_wc wc; - - *lengthp = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - if ((user && wqe->sg_list[i].lkey == 0) || - !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) - goto bad_lkey; - *lengthp += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ret = 1; - goto bail; - -bad_lkey: - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.vendor_err = 0; - wc.byte_len = 0; - wc.imm_data = 0; - wc.qp = &qp->ibqp; - wc.src_qp = 0; - wc.wc_flags = 0; - wc.pkey_index = 0; - wc.slid = 0; - wc.sl = 0; - wc.dlid_path_bits = 0; - wc.port_num = 0; - /* Signal solicited completion event. */ - ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - /** * ipath_ud_loopback - handle send on loopback QPs - * @sqp: the QP - * @ss: the SGE state - * @length: the length of the data to send - * @wr: the work request - * @wc: the work completion entry + * @sqp: the sending QP + * @swqe: the send work request * - * This is called from ipath_post_ud_send() to forward a WQE addressed + * This is called from ipath_make_ud_req() to forward a WQE addressed * to the same HCA. * Note that the receive interrupt handler may be calling ipath_ud_rcv() * while this is being called. */ -static void ipath_ud_loopback(struct ipath_qp *sqp, - struct ipath_sge_state *ss, - u32 length, struct ib_send_wr *wr, - struct ib_wc *wc) +static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) { struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); struct ipath_qp *qp; @@ -110,12 +59,18 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_rwq *wq; struct ipath_rwqe *wqe; void (*handler)(struct ib_event *, void *); + struct ib_wc wc; u32 tail; u32 rlen; + u32 length; - qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); - if (!qp) - return; + qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); + if (!qp) { + dev->n_pkt_drops++; + goto send_comp; + } + + rsge.sg_list = NULL; /* * Check that the qkey matches (except for QP0, see 9.6.1.4.1). @@ -123,39 +78,34 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, * qkey from the QP context instead of the WR (see 10.2.5). */ if (unlikely(qp->ibqp.qp_num && - ((int) wr->wr.ud.remote_qkey < 0 - ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) { + ((int) swqe->wr.wr.ud.remote_qkey < 0 ? + sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { /* XXX OK to lose a count once in a while. */ dev->qkey_violations++; dev->n_pkt_drops++; - goto done; + goto drop; } /* * A GRH is expected to preceed the data even if not * present on the wire. */ - wc->byte_len = length + sizeof(struct ib_grh); + length = swqe->length; + wc.byte_len = length + sizeof(struct ib_grh); - if (wr->opcode == IB_WR_SEND_WITH_IMM) { - wc->wc_flags = IB_WC_WITH_IMM; - wc->imm_data = wr->imm_data; + if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + wc.wc_flags = IB_WC_WITH_IMM; + wc.imm_data = swqe->wr.imm_data; } else { - wc->wc_flags = 0; - wc->imm_data = 0; + wc.wc_flags = 0; + wc.imm_data = 0; } - if (wr->num_sge > 1) { - rsge.sg_list = kmalloc((wr->num_sge - 1) * - sizeof(struct ipath_sge), - GFP_ATOMIC); - } else - rsge.sg_list = NULL; - /* - * Get the next work request entry to find where to put the data. - * Note that it is safe to drop the lock after changing rq->tail - * since ipath_post_receive() won't fill the empty slot. + * This would be a lot simpler if we could call ipath_get_rwqe() + * but that uses state that the receive interrupt handler uses + * so we would need to lock out receive interrupts while doing + * local loopback. */ if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); @@ -167,32 +117,53 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, rq = &qp->r_rq; } + if (rq->max_sge > 1) { + /* + * XXX We could use GFP_KERNEL if ipath_do_send() + * was always called from the tasklet instead of + * from ipath_post_send(). + */ + rsge.sg_list = kmalloc((rq->max_sge - 1) * + sizeof(struct ipath_sge), + GFP_ATOMIC); + if (!rsge.sg_list) { + dev->n_pkt_drops++; + goto drop; + } + } + + /* + * Get the next work request entry to find where to put the data. + * Note that it is safe to drop the lock after changing rq->tail + * since ipath_post_receive() won't fill the empty slot. + */ spin_lock_irqsave(&rq->lock, flags); wq = rq->wq; tail = wq->tail; - while (1) { - if (unlikely(tail == wq->head)) { - spin_unlock_irqrestore(&rq->lock, flags); - dev->n_pkt_drops++; - goto bail_sge; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); - if (++tail >= rq->size) - tail = 0; - if (init_sge(qp, wqe, &rlen, &rsge)) - break; - wq->tail = tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + if (unlikely(tail == wq->head)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto drop; + } + wqe = get_rwqe_ptr(rq, tail); + if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { + spin_unlock_irqrestore(&rq->lock, flags); + dev->n_pkt_drops++; + goto drop; } /* Silently drop packets which are too big. */ - if (wc->byte_len > rlen) { + if (wc.byte_len > rlen) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; - goto bail_sge; + goto drop; } + if (++tail >= rq->size) + tail = 0; wq->tail = tail; - wc->wr_id = wqe->wr_id; + wc.wr_id = wqe->wr_id; if (handler) { u32 n; @@ -221,13 +192,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, } else spin_unlock_irqrestore(&rq->lock, flags); - ah_attr = &to_iah(wr->wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); - wc->wc_flags |= IB_WC_GRH; + wc.wc_flags |= IB_WC_GRH; } else ipath_skip_sge(&rsge, sizeof(struct ib_grh)); - sge = &ss->sge; + sge = swqe->sg_list; while (length) { u32 len = sge->length; @@ -241,8 |