diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/cq.c')
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/cq.c | 360 | 
1 files changed, 239 insertions, 121 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 8d8f8add6fc..c04292c950f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -134,7 +134,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,  			V_FW_RI_RES_WR_IQANUS(0) |  			V_FW_RI_RES_WR_IQANUD(1) |  			F_FW_RI_RES_WR_IQANDST | -			V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); +			V_FW_RI_RES_WR_IQANDSTINDEX( +				rdev->lldi.ciq_ids[cq->vector]));  	res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(  			F_FW_RI_RES_WR_IQDROPRSS |  			V_FW_RI_RES_WR_IQPCIECH(2) | @@ -185,7 +186,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)  				 V_CQE_OPCODE(FW_RI_SEND) |  				 V_CQE_TYPE(0) |  				 V_CQE_SWCQE(1) | -				 V_CQE_QPID(wq->rq.qid)); +				 V_CQE_QPID(wq->sq.qid));  	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));  	cq->sw_queue[cq->sw_pidx] = cqe;  	t4_swcq_produce(cq); @@ -225,43 +226,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,  	t4_swcq_produce(cq);  } -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp)  {  	int flushed = 0; -	struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; -	int in_use = wq->sq.in_use - count; +	struct t4_wq *wq = &qhp->wq; +	struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); +	struct t4_cq *cq = &chp->cq; +	int idx; +	struct t4_swsqe *swsqe; -	BUG_ON(in_use < 0); -	while (in_use--) { -		swsqe->signaled = 0; +	if (wq->sq.flush_cidx == -1) +		wq->sq.flush_cidx = wq->sq.cidx; +	idx = wq->sq.flush_cidx; +	BUG_ON(idx >= wq->sq.size); +	while (idx != wq->sq.pidx) { +		swsqe = &wq->sq.sw_sq[idx]; +		BUG_ON(swsqe->flushed); +		swsqe->flushed = 1;  		insert_sq_cqe(wq, cq, swsqe); -		swsqe++; -		if (swsqe == (wq->sq.sw_sq + wq->sq.size)) -			swsqe = wq->sq.sw_sq; +		if (wq->sq.oldest_read == swsqe) { +			BUG_ON(swsqe->opcode != FW_RI_READ_REQ); +			advance_oldest_read(wq); +		}  		flushed++; +		if (++idx == wq->sq.size) +			idx = 0;  	} +	wq->sq.flush_cidx += flushed; +	if (wq->sq.flush_cidx >= wq->sq.size) +		wq->sq.flush_cidx -= wq->sq.size;  	return flushed;  } +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ +	struct t4_swsqe *swsqe; +	int cidx; + +	if (wq->sq.flush_cidx == -1) +		wq->sq.flush_cidx = wq->sq.cidx; +	cidx = wq->sq.flush_cidx; +	BUG_ON(cidx > wq->sq.size); + +	while (cidx != wq->sq.pidx) { +		swsqe = &wq->sq.sw_sq[cidx]; +		if (!swsqe->signaled) { +			if (++cidx == wq->sq.size) +				cidx = 0; +		} else if (swsqe->complete) { + +			BUG_ON(swsqe->flushed); + +			/* +			 * Insert this completed cqe into the swcq. +			 */ +			PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", +					__func__, cidx, cq->sw_pidx); +			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); +			cq->sw_queue[cq->sw_pidx] = swsqe->cqe; +			t4_swcq_produce(cq); +			swsqe->flushed = 1; +			if (++cidx == wq->sq.size) +				cidx = 0; +			wq->sq.flush_cidx = cidx; +		} else +			break; +	} +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, +		struct t4_cqe *read_cqe) +{ +	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; +	read_cqe->len = htonl(wq->sq.oldest_read->read_len); +	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | +			V_CQE_SWCQE(SW_CQE(hw_cqe)) | +			V_CQE_OPCODE(FW_RI_READ_REQ) | +			V_CQE_TYPE(1)); +	read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + +	u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + +	if (rptr == wq->sq.size) +		rptr = 0; +	while (rptr != wq->sq.pidx) { +		wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + +		if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) +			return; +		if (++rptr == wq->sq.size) +			rptr = 0; +	} +	wq->sq.oldest_read = NULL; +} +  /*   * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs.   */ -void c4iw_flush_hw_cq(struct t4_cq *cq) +void c4iw_flush_hw_cq(struct c4iw_cq *chp)  { -	struct t4_cqe *cqe = NULL, *swcqe; +	struct t4_cqe *hw_cqe, *swcqe, read_cqe; +	struct c4iw_qp *qhp; +	struct t4_swsqe *swsqe;  	int ret; -	PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); -	ret = t4_next_hw_cqe(cq, &cqe); +	PDBG("%s  cqid 0x%x\n", __func__, chp->cq.cqid); +	ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + +	/* +	 * This logic is similar to poll_cq(), but not quite the same +	 * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but +	 * also do any translation magic that poll_cq() normally does. +	 */  	while (!ret) { -		PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n", -		     __func__, cq->cidx, cq->sw_pidx); -		swcqe = &cq->sw_queue[cq->sw_pidx]; -		*swcqe = *cqe; -		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); -		t4_swcq_produce(cq); -		t4_hwcq_consume(cq); -		ret = t4_next_hw_cqe(cq, &cqe); +		qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + +		/* +		 * drop CQEs with no associated QP +		 */ +		if (qhp == NULL) +			goto next_cqe; + +		if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) +			goto next_cqe; + +		if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + +			/* If we have reached here because of async +			 * event or other error, and have egress error +			 * then drop +			 */ +			if (CQE_TYPE(hw_cqe) == 1) +				goto next_cqe; + +			/* drop peer2peer RTR reads. +			 */ +			if (CQE_WRID_STAG(hw_cqe) == 1) +				goto next_cqe; + +			/* +			 * Eat completions for unsignaled read WRs. +			 */ +			if (!qhp->wq.sq.oldest_read->signaled) { +				advance_oldest_read(&qhp->wq); +				goto next_cqe; +			} + +			/* +			 * Don't write to the HWCQ, create a new read req CQE +			 * in local memory and move it into the swcq. +			 */ +			create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); +			hw_cqe = &read_cqe; +			advance_oldest_read(&qhp->wq); +		} + +		/* if its a SQ completion, then do the magic to move all the +		 * unsignaled and now in-order completions into the swcq. +		 */ +		if (SQ_TYPE(hw_cqe)) { +			swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; +			swsqe->cqe = *hw_cqe; +			swsqe->complete = 1; +			flush_completed_wrs(&qhp->wq, &chp->cq); +		} else { +			swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; +			*swcqe = *hw_cqe; +			swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); +			t4_swcq_produce(&chp->cq); +		} +next_cqe: +		t4_hwcq_consume(&chp->cq); +		ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);  	}  } @@ -281,25 +425,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)  	return 1;  } -void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) -{ -	struct t4_cqe *cqe; -	u32 ptr; - -	*count = 0; -	ptr = cq->sw_cidx; -	while (ptr != cq->sw_pidx) { -		cqe = &cq->sw_queue[ptr]; -		if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && -				      wq->sq.oldest_read)) && -		    (CQE_QPID(cqe) == wq->sq.qid)) -			(*count)++; -		if (++ptr == cq->size) -			ptr = 0; -	} -	PDBG("%s cq %p count %d\n", __func__, cq, *count); -} -  void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)  {  	struct t4_cqe *cqe; @@ -311,7 +436,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)  	while (ptr != cq->sw_pidx) {  		cqe = &cq->sw_queue[ptr];  		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && -		    (CQE_QPID(cqe) == wq->rq.qid) && cqe_completes_wr(cqe, wq)) +		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))  			(*count)++;  		if (++ptr == cq->size)  			ptr = 0; @@ -319,70 +444,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)  	PDBG("%s cq %p count %d\n", __func__, cq, *count);  } -static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) -{ -	struct t4_swsqe *swsqe; -	u16 ptr = wq->sq.cidx; -	int count = wq->sq.in_use; -	int unsignaled = 0; - -	swsqe = &wq->sq.sw_sq[ptr]; -	while (count--) -		if (!swsqe->signaled) { -			if (++ptr == wq->sq.size) -				ptr = 0; -			swsqe = &wq->sq.sw_sq[ptr]; -			unsignaled++; -		} else if (swsqe->complete) { - -			/* -			 * Insert this completed cqe into the swcq. -			 */ -			PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", -			     __func__, ptr, cq->sw_pidx); -			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); -			cq->sw_queue[cq->sw_pidx] = swsqe->cqe; -			t4_swcq_produce(cq); -			swsqe->signaled = 0; -			wq->sq.in_use -= unsignaled; -			break; -		} else -			break; -} - -static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, -				struct t4_cqe *read_cqe) -{ -	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; -	read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); -	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | -				 V_CQE_SWCQE(SW_CQE(hw_cqe)) | -				 V_CQE_OPCODE(FW_RI_READ_REQ) | -				 V_CQE_TYPE(1)); -	read_cqe->bits_type_ts = hw_cqe->bits_type_ts; -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t4_wq *wq) -{ - -	u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; - -	if (rptr == wq->sq.size) -		rptr = 0; -	while (rptr != wq->sq.pidx) { -		wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; - -		if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) -			return; -		if (++rptr == wq->sq.size) -			rptr = 0; -	} -	wq->sq.oldest_read = NULL; -} -  /*   * poll_cq   * @@ -427,6 +488,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,  	}  	/* +	* skip hw cqe's if the wq is flushed. +	*/ +	if (wq->flushed && !SW_CQE(hw_cqe)) { +		ret = -EAGAIN; +		goto skip_cqe; +	} + +	/* +	 * skip TERMINATE cqes... +	 */ +	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { +		ret = -EAGAIN; +		goto skip_cqe; +	} + +	/*  	 * Gotta tweak READ completions:  	 *	1) the cqe doesn't contain the sq_wptr from the wr.  	 *	2) opcode not reflected from the wr. @@ -435,12 +512,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,  	 */  	if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { -		/* -		 * If this is an unsolicited read response, then the read +		/* If we have reached here because of async +		 * event or other error, and have egress error +		 * then drop +		 */ +		if (CQE_TYPE(hw_cqe) == 1) { +			if (CQE_STATUS(hw_cqe)) +				t4_set_wq_in_error(wq); +			ret = -EAGAIN; +			goto skip_cqe; +		} + +		/* If this is an unsolicited read response, then the read  		 * was generated by the kernel driver as part of peer-2-peer  		 * connection setup.  So ignore the completion.  		 */ -		if (!wq->sq.oldest_read) { +		if (CQE_WRID_STAG(hw_cqe) == 1) {  			if (CQE_STATUS(hw_cqe))  				t4_set_wq_in_error(wq);  			ret = -EAGAIN; @@ -448,6 +535,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,  		}  		/* +		 * Eat completions for unsignaled read WRs. +		 */ +		if (!wq->sq.oldest_read->signaled) { +			advance_oldest_read(wq); +			ret = -EAGAIN; +			goto skip_cqe; +		} + +		/*  		 * Don't write to the HWCQ, so create a new read req CQE  		 * in local memory.  		 */ @@ -457,14 +553,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,  	}  	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { -		*cqe_flushed = t4_wq_in_error(wq); +		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);  		t4_set_wq_in_error(wq); -		goto proc_cqe; -	} - -	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { -		ret = -EAGAIN; -		goto skip_cqe;  	}  	/* @@ -523,7 +613,24 @@ proc_cqe:  	 * completion.  	 */  	if (SQ_TYPE(hw_cqe)) { -		wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); +		int idx = CQE_WRID_SQ_IDX(hw_cqe); +		BUG_ON(idx >= wq->sq.size); + +		/* +		* Account for any unsignaled completions completed by +		* this signaled completion.  In this case, cidx points +		* to the first unsignaled one, and idx points to the +		* signaled one.  So adjust in_use based on this delta. +		* if this is not completing any unsigned wrs, then the +		* delta will be 0. Handle wrapping also! +		*/ +		if (idx < wq->sq.cidx) +			wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; +		else +			wq->sq.in_use -= idx - wq->sq.cidx; +		BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); + +		wq->sq.cidx = (uint16_t)idx;  		PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);  		*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;  		t4_sq_consume(wq); @@ -532,6 +639,7 @@ proc_cqe:  		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;  		BUG_ON(t4_rq_empty(wq));  		t4_rq_consume(wq); +		goto skip_cqe;  	}  flush_wq: @@ -565,7 +673,7 @@ skip_cqe:  static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)  {  	struct c4iw_qp *qhp = NULL; -	struct t4_cqe cqe = {0, 0}, *rd_cqe; +	struct t4_cqe uninitialized_var(cqe), *rd_cqe;  	struct t4_wq *wq;  	u32 credit = 0;  	u8 cqe_flushed; @@ -763,6 +871,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,  	rhp = to_c4iw_dev(ibdev); +	if (vector >= rhp->rdev.lldi.nciq) +		return ERR_PTR(-EINVAL); +  	chp = kzalloc(sizeof(*chp), GFP_KERNEL);  	if (!chp)  		return ERR_PTR(-ENOMEM); @@ -784,7 +895,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,  	/*  	 * Make actual HW queue 2x to avoid cdix_inc overflows.  	 */ -	hwentries = entries * 2; +	hwentries = min(entries * 2, T4_MAX_IQ_SIZE);  	/*  	 * Make HW queue at least 64 entries so GTS updates aren't too @@ -801,9 +912,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,  	if (ucontext) {  		memsize = roundup(memsize, PAGE_SIZE);  		hwentries = memsize / sizeof *chp->cq.queue; +		while (hwentries > T4_MAX_IQ_SIZE) { +			memsize -= PAGE_SIZE; +			hwentries = memsize / sizeof *chp->cq.queue; +		}  	}  	chp->cq.size = hwentries;  	chp->cq.memsize = memsize; +	chp->cq.vector = vector;  	ret = create_cq(&rhp->rdev, &chp->cq,  			ucontext ? &ucontext->uctx : &rhp->rdev.uctx); @@ -814,6 +930,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,  	chp->cq.size--;				/* status page */  	chp->ibcq.cqe = entries - 2;  	spin_lock_init(&chp->lock); +	spin_lock_init(&chp->comp_handler_lock);  	atomic_set(&chp->refcnt, 1);  	init_waitqueue_head(&chp->wait);  	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); @@ -838,7 +955,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,  		uresp.gts_key = ucontext->key;  		ucontext->key += PAGE_SIZE;  		spin_unlock(&ucontext->mmap_lock); -		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); +		ret = ib_copy_to_udata(udata, &uresp, +				       sizeof(uresp) - sizeof(uresp.reserved));  		if (ret)  			goto err5;  | 
