aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2008-05-13 11:41:29 -0700
committerRoland Dreier <rolandd@cisco.com>2008-05-13 11:41:29 -0700
commite509be898d8937634437caa474b57ac12795e5bc (patch)
treef99b0e3965ee0eaf89acccdba382689719881a29
parent53dc1ca194c062aa9771e194047f27ec1ca592df (diff)
IB/ipath: Fix many locking issues when switching to error state
The send DMA hardware queue voided a number of prior assumptions about when a send is complete which led to completions being generated out of order. There were also a number of locking issues when switching the QP to the error or reset states, and we implement the IB_QPS_SQD state. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c183
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c151
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c168
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c66
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c174
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h57
8 files changed, 554 insertions, 304 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 6f98632877e..4715911101e 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
{
struct ipath_qp *q, **qpp;
unsigned long flags;
- int fnd = 0;
spin_lock_irqsave(&qpt->lock, flags);
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
*qpp = qp->next;
qp->next = NULL;
atomic_dec(&qp->refcount);
- fnd = 1;
break;
}
}
spin_unlock_irqrestore(&qpt->lock, flags);
-
- if (!fnd)
- return;
-
- free_qpn(qpt, qp->ibqp.qp_num);
-
- wait_event(qp->wait, !atomic_read(&qp->refcount));
}
/**
- * ipath_free_all_qps - remove all QPs from the table
+ * ipath_free_all_qps - check for QPs still in use
* @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
*/
-void ipath_free_all_qps(struct ipath_qp_table *qpt)
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
{
unsigned long flags;
- struct ipath_qp *qp, *nqp;
- u32 n;
+ struct ipath_qp *qp;
+ u32 n, qp_inuse = 0;
+ spin_lock_irqsave(&qpt->lock, flags);
for (n = 0; n < qpt->max; n++) {
- spin_lock_irqsave(&qpt->lock, flags);
qp = qpt->table[n];
qpt->table[n] = NULL;
- spin_unlock_irqrestore(&qpt->lock, flags);
-
- while (qp) {
- nqp = qp->next;
- free_qpn(qpt, qp->ibqp.qp_num);
- if (!atomic_dec_and_test(&qp->refcount) ||
- !ipath_destroy_qp(&qp->ibqp))
- ipath_dbg("QP memory leak!\n");
- qp = nqp;
- }
+
+ for (; qp; qp = qp->next)
+ qp_inuse++;
}
+ spin_unlock_irqrestore(&qpt->lock, flags);
- for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
+ for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
if (qpt->map[n].page)
- free_page((unsigned long)qpt->map[n].page);
- }
+ free_page((unsigned long) qpt->map[n].page);
+ return qp_inuse;
}
/**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
- qp->s_busy = 0;
+ atomic_set(&qp->s_dma_busy, 0);
qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
qp->s_hdrwords = 0;
qp->s_wqe = NULL;
qp->s_pkt_delay = 0;
+ qp->s_draining = 0;
qp->s_psn = 0;
qp->r_psn = 0;
qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
}
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
- qp->r_wrid_valid = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
- qp->s_wait_credit = 0;
memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
qp->r_head_ack_queue = 0;
qp->s_tail_ack_queue = 0;
@@ -370,7 +359,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
}
- qp->r_reuse_sge = 0;
}
/**
@@ -402,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
- wc.vendor_err = 0;
- wc.byte_len = 0;
- wc.imm_data = 0;
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ ipath_schedule_send(qp);
+
+ memset(&wc, 0, sizeof(wc));
wc.qp = &qp->ibqp;
- wc.src_qp = 0;
- wc.wc_flags = 0;
- wc.pkey_index = 0;
- wc.slid = 0;
- wc.sl = 0;
- wc.dlid_path_bits = 0;
- wc.port_num = 0;
- if (qp->r_wrid_valid) {
- qp->r_wrid_valid = 0;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
wc.wr_id = qp->r_wr_id;
- wc.opcode = IB_WC_RECV;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
}
wc.status = IB_WC_WR_FLUSH_ERR;
- while (qp->s_last != qp->s_head) {
- struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-
- wc.wr_id = wqe->wr.wr_id;
- wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
- if (++qp->s_last >= qp->s_size)
- qp->s_last = 0;
- ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
- }
- qp->s_cur = qp->s_tail = qp->s_head;
- qp->s_hdrwords = 0;
- qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-
if (qp->r_rq.wq) {
struct ipath_rwq *wq;
u32 head;
@@ -450,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
tail = wq->tail;
if (tail >= qp->r_rq.size)
tail = 0;
- wc.opcode = IB_WC_RECV;
while (tail != head) {
wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
if (++tail >= qp->r_rq.size)
@@ -482,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_qp *qp = to_iqp(ibqp);
enum ib_qp_state cur_state, new_state;
- unsigned long flags;
int lastwqe = 0;
int ret;
- spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock_irq(&qp->s_lock);
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
@@ -539,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
switch (new_state) {
case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ spin_lock_irq(&qp->s_lock);
+ }
ipath_reset_qp(qp, ibqp->qp_type);
break;
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
case IB_QPS_ERR:
lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
break;
default:
+ qp->state = new_state;
break;
-
}
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -601,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
qp->s_max_rd_atomic = attr->max_rd_atomic;
- qp->state = new_state;
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
if (lastwqe) {
struct ib_event ev;
@@ -616,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto bail;
inval:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock_irq(&qp->s_lock);
ret = -EINVAL;
bail:
@@ -647,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->pkey_index = qp->s_pkey_index;
attr->alt_pkey_index = 0;
attr->en_sqd_async_notify = 0;
- attr->sq_draining = 0;
+ attr->sq_draining = qp->s_draining;
attr->max_rd_atomic = qp->s_max_rd_atomic;
attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -837,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
spin_lock_init(&qp->r_rq.lock);
atomic_set(&qp->refcount, 0);
init_waitqueue_head(&qp->wait);
+ init_waitqueue_head(&qp->wait_dma);
tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
INIT_LIST_HEAD(&qp->piowait);
INIT_LIST_HEAD(&qp->timerwait);
@@ -930,6 +924,7 @@ bail_ip:
else
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
bail_qp:
kfree(qp);
bail_swq:
@@ -951,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
- unsigned long flags;
- spin_lock_irqsave(&qp->s_lock, flags);
- qp->state = IB_QPS_ERR;
- spin_unlock_irqrestore(&qp->s_lock, flags);
- spin_lock(&dev->n_qps_lock);
- dev->n_qps_allocated--;
- spin_unlock(&dev->n_qps_lock);
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ } else
+ spin_unlock_irq(&qp->s_lock);
- /* Stop the sending tasklet. */
- tasklet_kill(&qp->s_task);
+ ipath_free_qp(&dev->qp_table, qp);
if (qp->s_tx) {
atomic_dec(&qp->refcount);
if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
kfree(qp->s_tx->txreq.map_addr);
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irq(&dev->pending_lock);
+ qp->s_tx = NULL;
}
- /* Make sure the QP isn't on the timeout list. */
- spin_lock_irqsave(&dev->pending_lock, flags);
- if (!list_empty(&qp->timerwait))
- list_del_init(&qp->timerwait);
- if (!list_empty(&qp->piowait))
- list_del_init(&qp->piowait);
- if (qp->s_tx)
- list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
- /*
- * Make sure that the QP is not in the QPN table so receive
- * interrupts will discard packets for this QP. XXX Also remove QP
- * from multicast table.
- */
- if (atomic_read(&qp->refcount) != 0)
- ipath_free_qp(&dev->qp_table, qp);
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
if (qp->ip)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1055,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
}
/* Restart sending if it was blocked due to lack of credits. */
- if (qp->s_cur != qp->s_head &&
+ if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+ qp->s_cur != qp->s_head &&
(qp->s_lsn == (u32) -1 ||
ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
qp->s_lsn + 1) <= 0))
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index b4b26c3aa61..5b5276a270b 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
u32 bth0;
u32 bth2;
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto bail;
+
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_rc_ack(dev, qp, ohdr, pmtu))
goto done;
- if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout || qp->s_wait_credit)
- goto bail;
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
- /* Limit the number of packets sent without an ACK. */
- if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
- qp->s_wait_credit = 1;
- dev->n_rc_stalls++;
+ /* Leave BUSY set until RNR timeout. */
+ if (qp->s_rnr_timeout) {
+ qp->s_flags |= IPATH_S_WAITING;
goto bail;
}
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
wqe = get_swqe_ptr(qp, qp->s_cur);
switch (qp->s_state) {
default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
/*
* Resend an old request or start a new one.
*
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
wqe->lpsn = wqe->psn;
if (len > pmtu) {
wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (qp->s_lsn != (u32) -1 &&
- ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
goto bail;
+ }
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
done:
ret = 1;
+ goto unlock;
+
bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, flags);
+ /* Don't try to send ACKs if the link isn't ACTIVE */
dd = dev->dd;
+ if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+ goto done;
+
piobuf = ipath_getpiobuf(dd, 0, NULL);
if (!piobuf) {
/*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
goto done;
queue_ack:
- dev->n_rc_qacks++;
- qp->s_flags |= IPATH_S_ACK_PENDING;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
+ dev->n_rc_qacks++;
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+ }
spin_unlock_irqrestore(&qp->s_lock, flags);
-
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
-
done:
return;
}
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
/*
* Set the state to restart in the middle of a request.
* Don't change the s_sge, s_cur_sge, or s_cur_size.
- * See ipath_do_rc_send().
+ * See ipath_make_rc_req().
*/
switch (opcode) {
case IB_WR_SEND:
@@ -801,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
reset_psn(qp, psn);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
bail:
return;
@@ -809,13 +840,7 @@ bail:
static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{
- if (qp->s_last_psn != psn) {
- qp->s_last_psn = psn;
- if (qp->s_wait_credit) {
- qp->s_wait_credit = 0;
- tasklet_hi_schedule(&qp->s_task);
- }
- }
+ qp->s_last_psn = psn;
}
/**
@@ -915,14 +940,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
qp->s_num_rd_atomic--;
/* Restart sending task if fence is complete */
- if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
- !qp->s_num_rd_atomic) {
- qp->s_flags &= ~IPATH_S_FENCE_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
- qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
- tasklet_hi_schedule(&qp->s_task);
- }
+ if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) ||
+ qp->s_flags & IPATH_S_RDMAR_PENDING)
+ ipath_schedule_send(qp);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -956,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
} else {
if (++qp->s_last >= qp->s_size)
qp->s_last = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+ qp->s_draining = 0;
if (qp->s_last == qp->s_tail)
break;
wqe = get_swqe_ptr(qp, qp->s_last);
@@ -979,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
*/
if (ipath_cmp24(qp->s_psn, psn) <= 0) {
reset_psn(qp, psn + 1);
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
}
} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
@@ -1018,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK];
ipath_insert_rnr_queue(qp);
+ ipath_schedule_send(qp);
goto bail;
case 3: /* NAK */
@@ -1108,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto ack_done;
+
/* Ignore invalid responses. */
if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
goto ack_done;
@@ -1343,7 +1371,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
psn &= IPATH_PSN_MASK;
e = NULL;
old_req = 1;
+
spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock_done;
+
for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue)
old_req = 0;
@@ -1471,7 +1504,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
break;
}
qp->r_nak_state = 0;
- tasklet_hi_schedule(&qp->s_task);
+ ipath_schedule_send(qp);
unlock_done:
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1503,18 +1536,15 @@ void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
{
- unsigned long flags;
unsigned next;
next = n + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
- spin_lock_irqsave(&qp->s_lock, flags);
if (n == qp->s_tail_ack_queue) {
qp->s_tail_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
/**
@@ -1543,6 +1573,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int diff;
struct ib_reth *reth;
int header_in_data;
+ unsigned long flags;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1690,9 +1721,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
- if (!qp->r_wrid_valid)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
break;
- qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
@@ -1764,9 +1794,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1787,7 +1821,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
rkey, IB_ACCESS_REMOTE_READ);
if (unlikely(!ok))
- goto nack_acc;
+ goto nack_acc_unlck;
/*
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
@@ -1814,13 +1848,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
case OP(COMPARE_SWAP):
@@ -1839,9 +1872,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
next = qp->r_head_ack_queue + 1;
if (next > IPATH_MAX_RDMA_ATOMIC)
next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
if (unlikely(next == qp->s_tail_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
- goto nack_inv;
+ goto nack_inv_unlck;
ipath_update_ack_queue(qp, next);
}
if (!header_in_data)
@@ -1851,13 +1888,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
be32_to_cpu(ateth->vaddr[1]);
if (unlikely(vaddr & (sizeof(u64) - 1)))
- goto nack_inv;
+ goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
- goto nack_acc;
+ goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = be64_to_cpu(ateth->swap_data);
@@ -1874,13 +1911,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
qp->r_psn++;
qp->r_state = opcode;
qp->r_nak_state = 0;
- barrier();
qp->r_head_ack_queue = next;
- /* Call ipath_do_rc_send() in another thread. */
- tasklet_hi_schedule(&qp->s_task);
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
- goto done;
+ goto unlock;
}
default:
@@ -1901,19 +1937,26 @@ rnr_nak:
qp->r_ack_psn = qp->r_psn;
goto send_ack;
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
goto send_ack;
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
send_rc_ack(qp);
+ goto done;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index c716a03dd39..a4b5521567f 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
+ * Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned long flags;
- spin_lock_irqsave(&dev->pending_lock, flags);
+ /* We already did a spin_lock_irqsave(), so just use spin_lock */
+ spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
- spin_unlock_irqrestore(&dev->pending_lock, flags);
+ spin_unlock(&dev->pending_lock);
}
/**
@@ -185,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
@@ -192,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
- spin_unlock_irqrestore(&rq->lock, flags);
ret = 0;
- goto bail;
+ goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
@@ -207,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wq->tail = tail;
ret = 1;
- qp->r_wrid_valid = 1;
+ set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
@@ -234,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
goto bail;
}
}
+unlock:
spin_unlock_irqrestore(&rq->lock, flags);
-
bail:
return ret;
}
@@ -263,35 +268,59 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
atomic64_t *maddr;
enum ib_wc_status send_status;
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
- if (!qp) {
- dev->n_pkt_drops++;
- return;
- }
-again:
spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
- sqp->s_rnr_timeout) {
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
- }
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
- /* Get the next send request. */
- if (sqp->s_last == sqp->s_head) {
- /* Send work queue is empty. */
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- goto done;
+ sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
*/
- wqe = get_swqe_ptr(sqp, sqp->s_last);
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
memset(&wc, 0, sizeof wc);
send_status = IB_WC_SUCCESS;
@@ -396,8 +425,7 @@ again:
sqp->s_len -= len;
}
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
- wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -417,6 +445,8 @@ again:
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
ipath_send_complete(sqp, wqe, send_status);
goto again;
@@ -437,11 +467,12 @@ rnr_nak:
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
- goto unlock;
+ goto clr_busy;
+ sqp->s_flags |= IPATH_S_WAITING;
dev->n_rnr_naks++;
sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
- goto unlock;
+ goto clr_busy;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
@@ -473,17 +504,19 @@ serr:
}
goto done;
}
+clr_busy:
+ sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
- if (atomic_dec_and_test(&qp->refcount))
+ if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
- qp->ibqp.qp_type == IB_QPT_SMI) {
+ qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
*/
-static void ipath_no_bufs_available(struct ipath_qp *qp,
+static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
+ int ret = 1;
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
- * could be called. If we are still in the tasklet function,
- * tasklet_hi_schedule() will not call us until the next time
- * tasklet_hi_schedule() is called.
- * We leave the busy flag set so that another post send doesn't
- * try to put the same QP on the piowait list again.
+ * could be called. Therefore, put QP on the piowait list before
+ * enabling the PIO avail interrupt.
*/
- spin_lock_irqsave(&dev->pending_lock, flags);
- list_add_tail(&qp->piowait, &dev->piowait);
- spin_unlock_irqrestore(&dev->pending_lock, flags);
- want_buffer(dev->dd, qp);
- dev->n_piowait++;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_piowait++;
+ qp->s_flags |= IPATH_S_WAITING;
+ qp->s_flags &= ~IPATH_S_BUSY;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->piowait))
+ list_add_tail(&qp->piowait, &dev->piowait);
+ spin_unlock(&dev->pending_lock);
+ } else
+ ret = 0;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (ret)
+ want_buffer(dev->dd, qp);
+ return ret;
}
/**
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp);
-
- if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
- goto bail;
+ unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
- goto clear;
+ goto bail;
}
if (qp->ibqp.qp_type == IB_QPT_RC)