aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/ehca/ehca_reqs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ehca/ehca_reqs.c')
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c313
1 files changed, 255 insertions, 58 deletions
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index dd9bc68f1c7..47f94984353 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -42,7 +42,6 @@
*/
-#include <asm-powerpc/system.h>
#include "ehca_classes.h"
#include "ehca_tools.h"
#include "ehca_qes.h"
@@ -53,9 +52,25 @@
/* in RC traffic, insert an empty RDMA READ every this many packets */
#define ACK_CIRC_THRESHOLD 2000000
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+ u64 ret;
+
+ ret = wr_id & ~QMAP_IDX_MASK;
+ ret |= idx & QMAP_IDX_MASK;
+
+ return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+ return wr_id & QMAP_IDX_MASK;
+}
+
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
struct ehca_wqe *wqe_p,
- struct ib_recv_wr *recv_wr)
+ struct ib_recv_wr *recv_wr,
+ u32 rq_map_idx)
{
u8 cnt_ds;
if (unlikely((recv_wr->num_sge < 0) ||
@@ -69,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = recv_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
wqe_p->nr_of_data_seg = recv_wr->num_sge;
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -120,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
mad_hdr->attr_mod);
}
for (j = 0; j < send_wr->num_sge; j++) {
- u8 *data = (u8 *)abs_to_virt(sge->addr);
+ u8 *data = __va(sge->addr);
ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
"lkey=%x",
idx, j, data, sge->length, sge->lkey);
@@ -139,12 +154,14 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
static inline int ehca_write_swqe(struct ehca_qp *qp,
struct ehca_wqe *wqe_p,
const struct ib_send_wr *send_wr,
+ u32 sq_map_idx,
int hidden)
{
u32 idx;
u64 dma_length;
struct ehca_av *my_av;
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -157,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = send_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
+
+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
@@ -182,8 +203,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
if ((send_wr->send_flags & IB_SEND_SIGNALED ||
qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
- && !hidden)
+ && !hidden) {
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ qmap_entry->cqe_req = 1;
+ }
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
@@ -245,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* no break is intentional here */
case IB_QPT_RC:
/* TODO: atomic not implemented */
- wqe_p->u.nud.remote_virtual_adress =
+ wqe_p->u.nud.remote_virtual_address =
send_wr->wr.rdma.remote_addr;
wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
@@ -376,33 +399,36 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
- struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
int ret;
+ u32 sq_map_idx;
u64 start_offset = my_qp->ipz_squeue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
}
+
+ /*
+ * Get the index of the WQE in the send queue. The same index is used
+ * for writing into the sq_map.
+ */
+ sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
+
/* write a SEND WQE into the QUEUE */
- ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
+ ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
/*
* if something failed,
* reset the free entry pointer to the start value
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@@ -416,7 +442,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@@ -425,7 +450,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* LOCK the QUEUE */
@@ -444,24 +470,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
- post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+ post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
- for (cur_send_wr = send_wr; cur_send_wr != NULL;
- cur_send_wr = cur_send_wr->next) {
- ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ while (send_wr) {
+ ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
- /* if one or more WQEs were successful, don't fail */
- if (wqe_cnt)
- ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
- } /* eof for cur_send_wr */
+ send_wr = send_wr->next;
+ }
post_send_exit0:
iosync(); /* serialize GAL register access */
@@ -471,6 +494,10 @@ post_send_exit0:
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+ if (ret)
+ *bad_send_wr = send_wr;
return ret;
}
@@ -479,56 +506,64 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
- struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
+ u32 rq_map_idx;
unsigned long flags;
+ struct ehca_qmap_entry *qmap_entry;
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
- /* loop processes list of send reqs */
- for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
- cur_recv_wr = cur_recv_wr->next) {
+ /* loop processes list of recv reqs */
+ while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_recv_wr)
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(dev, "Too many posted WQEs "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+ /*
+ * Get the index of the WQE in the recv queue. The same index
+ * is used for writing into the rq_map.
+ */
+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
+ rq_map_idx);
/*
* if something failed,
* reset the free entry pointer to the start value
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(dev, "Could not write WQE "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+
+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+ qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 1;
+
wqe_cnt++;
- } /* eof for cur_recv_wr */
+ recv_wr = recv_wr->next;
+ } /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@@ -537,6 +572,11 @@ post_recv_exit0:
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+ if (ret)
+ *bad_recv_wr = recv_wr;
+
return ret;
}
@@ -550,6 +590,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
+ *bad_recv_wr = recv_wr;
return -EINVAL;
}
@@ -583,13 +624,15 @@ static const u8 ib_wc_opcode[255] = {
/* internal function to poll one entry of cq */
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
{
- int ret = 0;
+ int ret = 0, qmap_tail_idx;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
struct ehca_qp *my_qp;
+ struct ehca_qmap_entry *qmap_entry;
+ struct ehca_queue_map *qmap;
int cqe_count = 0, is_error;
-poll_cq_one_read_cqe:
+repoll:
cqe = (struct ehca_cqe *)
ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
if (!cqe) {
@@ -617,7 +660,7 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
my_cq->cq_number, cqe->local_qp_number);
/* ignore this purged cqe */
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
spin_lock_irqsave(&qp->spinlock_s, flags);
purgeflag = qp->sqerr_purgeflag;
@@ -636,7 +679,7 @@ poll_cq_one_read_cqe:
* that caused sqe and turn off purge flag
*/
qp->sqerr_purgeflag = 0;
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
}
@@ -654,8 +697,62 @@ poll_cq_one_read_cqe:
my_cq, my_cq->cq_number);
}
- /* we got a completion! */
- wc->wr_id = cqe->work_request_id;
+ read_lock(&ehca_qp_idr_lock);
+ my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+ read_unlock(&ehca_qp_idr_lock);
+ if (!my_qp)
+ goto repoll;
+ wc->qp = &my_qp->ib_qp;
+
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
+ /* We got a receive completion. */
+ qmap = &my_qp->rq_map;
+
+ /* advance the tail pointer */
+ qmap->tail = qmap_tail_idx;
+
+ if (is_error) {
+ /*
+ * set left_to_poll to 0 because in error state, we will not
+ * get any additional CQEs
+ */
+ my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+ my_qp->sq_map.entries);
+ my_qp->sq_map.left_to_poll = 0;
+ ehca_add_to_err_list(my_qp, 1);
+
+ my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+ my_qp->rq_map.entries);
+ my_qp->rq_map.left_to_poll = 0;
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+
+ qmap_entry = &qmap->map[qmap_tail_idx];
+ if (qmap_entry->reported) {
+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+ my_qp->real_qp_num);
+ /* found a double cqe, discard it and read next one */
+ goto repoll;
+ }
+
+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+ qmap_entry->reported = 1;
+
+ /* if left_to_poll is decremented to 0, add the QP to the error list */
+ if (qmap->left_to_poll > 0) {
+ qmap->left_to_poll--;
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ ehca_add_to_err_list(my_qp, 1);
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+ }
/* eval ib_wc_opcode */
wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -667,7 +764,7 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
my_cq, my_cq->cq_number);
/* update also queue adder to throw away this entry!!! */
- goto poll_cq_one_exit0;
+ goto repoll;
}
/* eval ib_wc_status */
@@ -678,17 +775,16 @@ poll_cq_one_read_cqe:
} else
wc->status = IB_WC_SUCCESS;
- read_lock(&ehca_qp_idr_lock);
- my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
- wc->qp = &my_qp->ib_qp;
- read_unlock(&ehca_qp_idr_lock);
-
wc->byte_len = cqe->nr_bytes_transferred;
wc->pkey_index = cqe->pkey_index;
wc->slid = cqe->rlid;
wc->dlid_path_bits = cqe->dlid;
wc->src_qp = cqe->remote_qp_number;
- wc->wc_flags = cqe->w_completion_flags;
+ /*
+ * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+ * SW defines those in bits 1 and 0, so we can just shift and mask.
+ */
+ wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
@@ -699,13 +795,89 @@ poll_cq_one_exit0:
return ret;
}
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+ struct ib_wc *wc, int num_entries,
+ struct ipz_queue *ipz_queue, int on_sq)
+{
+ int nr = 0;
+ struct ehca_wqe *wqe;
+ u64 offset;
+ struct ehca_queue_map *qmap;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (on_sq)
+ qmap = &my_qp->sq_map;
+ else
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+ /* generate flush CQE */
+
+ memset(wc, 0, sizeof(*wc));
+
+ offset = qmap->next_wqe_idx * ipz_queue->qe_size;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+ if (!wqe) {
+ ehca_err(cq->device, "Invalid wqe offset=%#llx on "
+ "qp_num=%#x", offset, my_qp->real_qp_num);
+ return nr;
+ }
+
+ wc->wr_id = replace_wr_id(wqe->work_request_id,
+ qmap_entry->app_wr_id);
+
+ if (on_sq) {
+ switch (wqe->optype) {
+ case WQE_OPTYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case WQE_OPTYPE_RDMAWRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case WQE_OPTYPE_RDMAREAD:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ default:
+ ehca_err(cq->device, "Invalid optype=%x",
+ wqe->optype);
+ return nr;
+ }
+ } else
+ wc->opcode = IB_WC_RECV;
+
+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+ wc->ex.imm_data = wqe->immediate_data;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ }
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+
+ wc->qp = &my_qp->ib_qp;
+
+ /* mark as reported and advance next_wqe pointer */
+ qmap_entry->reported = 1;
+ qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
+ qmap->entries);
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ wc++; nr++;
+ }
+
+ return nr;
+
+}
+
int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int nr;
+ struct ehca_qp *err_qp;
struct ib_wc *current_wc = wc;
int ret = 0;
unsigned long flags;
+ int entries_left = num_entries;
if (num_entries < 1) {
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -715,15 +887,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
}
spin_lock_irqsave(&my_cq->spinlock, flags);
- for (nr = 0; nr < num_entries; nr++) {
+
+ /* generate flush cqes for send queues */
+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_squeue, 1);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ /* generate flush cqes for receive queues */
+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_rqueue, 0);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ for (nr = 0; nr < entries_left; nr++) {
ret = ehca_poll_cq_one(cq, current_wc);
if (ret)
break;
current_wc++;
} /* eof for nr */
+ entries_left -= nr;
+
spin_unlock_irqrestore(&my_cq->spinlock, flags);
if (ret == -EAGAIN || !ret)
- ret = nr;
+ ret = num_entries - entries_left;
poll_cq_exit0:
return ret;