diff options
Diffstat (limited to 'drivers/infiniband/hw/ehca/ehca_reqs.c')
| -rw-r--r-- | drivers/infiniband/hw/ehca/ehca_reqs.c | 550 |
1 files changed, 425 insertions, 125 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index b46bda1bf85..47f94984353 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -3,8 +3,9 @@ * * post_send/recv, poll_cq, req_notify * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * Reinhard Ernst <rernst@de.ibm.com> * * Copyright (c) 2005 IBM Corporation @@ -41,7 +42,6 @@ */ -#include <asm-powerpc/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -49,9 +49,28 @@ #include "hcp_if.h" #include "hipz_fns.h" +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -65,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -77,9 +96,10 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, recv_wr->sg_list[cnt_ds].length; } - if (ehca_debug_level) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + if (ehca_debug_level >= 3) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); } return 0; @@ -98,7 +118,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; struct ib_sge *sge = send_wr->sg_list; ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x",idx, send_wr->wr_id, + "send_flags=%x opcode=%x", idx, send_wr->wr_id, send_wr->num_sge, send_wr->send_flags, send_wr->opcode); if (mad_hdr) { @@ -115,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *) abs_to_virt(sge->addr); + u8 *data = __va(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -133,12 +153,15 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr) + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -151,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -174,13 +201,17 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->wr_flag = 0; - if (send_wr->send_flags & IB_SEND_SIGNALED) + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } if (send_wr->opcode == IB_WR_SEND_WITH_IMM || send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data); + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; } @@ -197,10 +228,14 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; wqe_p->local_ee_context_qkey = remote_qkey; - if (!send_wr->wr.ud.ah) { + if (unlikely(!send_wr->wr.ud.ah)) { ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); return -EINVAL; } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); wqe_p->u.ud_av.ud_av = my_av->av; @@ -233,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* no break is intentional here */ case IB_QPT_RC: /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_adress = + wqe_p->u.nud.remote_virtual_address = send_wr->wr.rdma.remote_addr; wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; @@ -253,6 +288,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, } /* eof idx */ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + break; default: @@ -260,7 +304,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, return -EINVAL; } - if (ehca_debug_level) { + if (ehca_debug_level >= 3) { ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); } @@ -353,125 +397,214 @@ static inline void map_ib_wc_status(u32 cqe_status, *wc_status = IB_WC_SUCCESS; } +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ib_send_wr *cur_send_wr; - struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; + + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); + + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } /* loop processes list of send reqs */ - for (cur_send_wr = send_wr; cur_send_wr != NULL; - cur_send_wr = cur_send_wr->next) { - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); - } - goto post_send_exit0; - } - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr); - /* - * if something failed, - * reset the free entry pointer to the start value - */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - *bad_send_wr = cur_send_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); - } goto post_send_exit0; } wqe_cnt++; - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); - } /* eof for cur_send_wr */ + send_wr = send_wr->next; + } post_send_exit0: - /* UNLOCK the QUEUE */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; return ret; } -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) { - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + u32 rq_map_idx; + unsigned long flags; + struct ehca_qmap_entry *qmap_entry; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + ret = -ENODEV; + goto out; + } /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_r, flags); - /* loop processes list of send reqs */ - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; - cur_recv_wr = cur_recv_wr->next) { + /* loop processes list of recv reqs */ + while (recv_wr) { u64 start_offset = my_qp->ipz_rqueue.current_q_offset; /* get pointer next to free WQE */ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_recv_wr) - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); - } + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value */ if (unlikely(ret)) { my_qp->ipz_rqueue.current_q_offset = start_offset; - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); - } + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + wqe_cnt++; - ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); - } /* eof for cur_recv_wr */ + recv_wr = recv_wr->next; + } /* eof for recv_wr */ post_recv_exit0: - spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + return ret; } +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + /* * ib_wc_opcode table converts ehca wc opcode to ib * Since we use zero to indicate invalid opcode, the actual ib opcode must @@ -491,19 +624,23 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; - int cqe_count = 0; + struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; -poll_cq_one_read_cqe: +repoll: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { ret = -EAGAIN; - ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p " - "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret); - goto poll_cq_one_exit0; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; } /* prevents loads being reordered across this point */ @@ -511,9 +648,11 @@ poll_cq_one_read_cqe: cqe_count++; if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); + struct ehca_qp *qp; int purgeflag; - unsigned long spl_flags; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -521,17 +660,17 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ - goto poll_cq_one_read_cqe; + goto repoll; } - spin_lock_irqsave(&qp->spinlock_s, spl_flags); + spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { - ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " - "src_qp=%x", + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level) + if (ehca_debug_level >= 2) ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", cqe->local_qp_number, cqe->remote_qp_number); @@ -540,15 +679,17 @@ poll_cq_one_read_cqe: * that caused sqe and turn off purge flag */ qp->sqerr_purgeflag = 0; - goto poll_cq_one_read_cqe; + goto repoll; } } - /* tracing cqe */ - if (ehca_debug_level) { + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { ehca_dbg(cq->device, - "Received COMPLETION ehca_cq=%p cq_num=%x -----", - my_cq, my_cq->cq_number); + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); ehca_dbg(cq->device, @@ -556,8 +697,62 @@ poll_cq_one_read_cqe: my_cq, my_cq->cq_number); } - /* we got a completion! */ - wc->wr_id = cqe->work_request_id; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -569,34 +764,30 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ - goto poll_cq_one_exit0; + goto repoll; } + /* eval ib_wc_status */ - if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) { + if (unlikely(is_error)) { /* complete with errors */ map_ib_wc_status(cqe->status, &wc->status); wc->vendor_err = wc->status; } else wc->status = IB_WC_SUCCESS; - wc->qp_num = cqe->local_qp_number; wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; - wc->wc_flags = cqe->w_completion_flags; - wc->imm_data = cpu_to_be32(cqe->immediate_data); + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (wc->status != IB_WC_SUCCESS) - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " - "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " - "cqe=%p", my_cq, my_cq->cq_number, cqe->optype, - cqe->status, cqe->local_qp_number, - cqe->remote_qp_number, cqe->work_request_id, cqe); - poll_cq_one_exit0: if (cqe_count > 0) hipz_update_feca(my_cq, cqe_count); @@ -604,13 +795,89 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; - unsigned long spl_flags; + unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -619,26 +886,52 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) goto poll_cq_exit0; } - spin_lock_irqsave(&my_cq->spinlock, spl_flags); - for (nr = 0; nr < num_entries; nr++) { + spin_lock_irqsave(&my_cq->spinlock, flags); + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + entries_left -= nr; + + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; } -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int ret = 0; - switch (cq_notify) { + switch (notify_flags & IB_CQ_SOLICITED_MASK) { case IB_CQ_SOLICITED: hipz_set_cqx_n0(my_cq, 1); break; @@ -649,5 +942,12 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify) return -EINVAL; } - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; } |
