From c0af2c057d7ce3f0b260f9380d187a82bb5cab28 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 16 Feb 2011 15:48:25 +0000 Subject: IB/qib: Prevent double completions after a timeout or RNR error There is a double completion associated with error handling for RC QPs. The sequence is: - The do_rc_ack() routine fields an RNR nack and there are 0 rnr_retries configured on the QP. - qib_error_qp() stops the pending timer - qib_rc_send_complete() is called from sdma_complete() - qib_rc_send_complete() starts the timer because the msb of the psn just completed says an ack is needed. - a bunch of flushes occur as ipoib posts WQEs to an error'ed QP - rc_timeout() calls qib_restart_rc() - qib_restart_rc() calls qib_send_complete() with a IB_WC_RETRY_EXC_ERR on a wqe that has already been completed in the past The fix avoids starting the timer since another packet will never arrive. Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_rc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 31e09b05a1a..eca0c41f122 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1005,7 +1005,8 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) * there are still requests that haven't been acked. */ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && - !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN))) + !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) && + (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) start_timer(qp); while (qp->s_last != qp->s_acked) { -- cgit v1.2.3-18-g5258