aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/c2.h2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c16
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c45
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c19
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c42
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c69
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c13
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c101
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c76
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c17
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c14
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c15
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c68
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c64
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c52
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c55
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c55
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c16
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h29
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c16
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig9
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c100
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c525
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c216
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c339
-rw-r--r--drivers/infiniband/hw/mlx4/main.c652
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h285
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c184
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1386
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c340
-rw-r--r--drivers/infiniband/hw/mlx4/user.h95
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c40
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c179
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c1
60 files changed, 4834 insertions, 426 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
index 04a9db5de88..fa58200217a 100644
--- a/drivers/infiniband/hw/amso1100/c2.h
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -519,7 +519,7 @@ extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
/* CM */
extern int c2_llp_connect(struct iw_cm_id *cm_id,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 5175c99ee58..d2b3366786d 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -217,17 +217,19 @@ int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
return npolled;
}
-int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct c2_mq_shared __iomem *shared;
struct c2_cq *cq;
+ unsigned long flags;
+ int ret = 0;
cq = to_c2cq(ibcq);
shared = cq->mq.peer;
- if (notify == IB_CQ_NEXT_COMP)
+ if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
- else if (notify == IB_CQ_SOLICITED)
+ else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
else
return -EINVAL;
@@ -241,7 +243,13 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
*/
readb(&shared->armed);
- return 0;
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ spin_lock_irqsave(&cq->lock, flags);
+ ret = !c2_mq_empty(&cq->mq);
+ spin_unlock_irqrestore(&cq->lock, flags);
+ }
+
+ return ret;
}
static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 607c09bf764..997cf153076 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
#include <asm/byteorder.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "c2.h"
#include "c2_provider.h"
@@ -290,7 +291,7 @@ static int c2_destroy_qp(struct ib_qp *ib_qp)
return 0;
}
-static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
}
mr->pd = to_c2pd(ib_pd);
+ mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
}
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
u64 *pages;
u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct c2_mr *c2mr;
pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
- shift = ffs(region->page_size) - 1;
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr)
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(c2mr->umem)) {
+ err = PTR_ERR(c2mr->umem);
+ kfree(c2mr);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(c2mr->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
}
i = 0;
- list_for_each_entry(chunk, &region->chunk_list, list) {
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(&chunk->page_list[j]) +
- (region->page_size * k);
+ (c2mr->umem->page_size * k);
}
}
}
- kva = (u64)region->virt_base;
+ kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages,
- region->page_size,
+ c2mr->umem->page_size,
i,
- region->length,
- region->offset,
+ length,
+ c2mr->umem->offset,
&kva,
c2_convert_access(acc),
c2mr);
kfree(pages);
- if (err) {
- kfree(c2mr);
- return ERR_PTR(err);
- }
+ if (err)
+ goto err;
return &c2mr->ibmr;
err:
+ ib_umem_release(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err);
- else
+ else {
+ if (mr->umem)
+ ib_umem_release(mr->umem);
kfree(mr);
+ }
return err;
}
@@ -795,6 +807,7 @@ int c2_register_device(struct c2_dev *dev)
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dma_device = &dev->pcidev->dev;
dev->ibdev.query_device = c2_query_device;
dev->ibdev.query_port = c2_query_port;
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc906223220..1076df2ee96 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
struct c2_mr {
struct ib_mr ibmr;
struct c2_pd *pd;
+ struct ib_umem *umem;
};
struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index f5e9aeec6f6..76049afc765 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -114,7 +114,10 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
return -EIO;
}
}
+
+ return 1;
}
+
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 90d7b8972cb..ff7290eacef 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -38,6 +38,7 @@
#include "firmware_exports.h"
#define T3_MAX_SGE 4
+#define T3_MAX_INLINE 64
#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3b4b0acd707..b2faff5abce 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1109,6 +1109,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
PDBG("%s ep %p\n", __FUNCTION__, ep);
+ /*
+ * We get 2 abort replies from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+ if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) {
+ ep->flags |= ABORT_REQ_IN_PROGRESS;
+ return CPL_RET_BUF_DONE;
+ }
+
close_complete_upcall(ep);
state_set(&ep->com, DEAD);
release_ep_resources(ep);
@@ -1189,6 +1198,7 @@ static int listen_stop(struct iwch_listen_ep *ep)
}
req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->cpu_idx = 0;
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
skb->priority = 1;
ep->com.tdev->send(ep->com.tdev, skb);
@@ -1475,6 +1485,15 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
int ret;
int state;
+ /*
+ * We get 2 peer aborts from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+ if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) {
+ ep->flags |= PEER_ABORT_IN_PROGRESS;
+ return CPL_RET_BUF_DONE;
+ }
+
if (is_neg_adv_abort(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
ep->hwtid);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 0c6f281bd4a..21a388c313c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -143,6 +143,11 @@ enum iwch_ep_state {
DEAD,
};
+enum iwch_ep_flags {
+ PEER_ABORT_IN_PROGRESS = (1 << 0),
+ ABORT_REQ_IN_PROGRESS = (1 << 1),
+};
+
struct iwch_ep_common {
struct iw_cm_id *cm_id;
struct iwch_qp *qp;
@@ -181,6 +186,7 @@ struct iwch_ep {
u16 plen;
u32 ird;
u32 ord;
+ u32 flags;
};
static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index af28a317016..e7c2c394803 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
#include <rdma/iw_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "cxio_hal.h"
@@ -139,7 +140,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq)
return 0;
}
-static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries,
+static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *ib_context,
struct ib_udata *udata)
{
@@ -292,7 +293,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
#endif
}
-static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct iwch_dev *rhp;
struct iwch_cq *chp;
@@ -303,7 +304,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
chp = to_iwch_cq(ibcq);
rhp = chp->rhp;
- if (notify == IB_CQ_SOLICITED)
+ if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
cq_op = CQ_ARM_SE;
else
cq_op = CQ_ARM_AN;
@@ -317,9 +318,11 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
spin_unlock_irqrestore(&chp->lock, flag);
- if (err)
+ if (err < 0)
printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
chp->cq.cqid);
+ if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
+ err = 0;
return err;
}
@@ -441,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
kfree(mhp);
return 0;
@@ -575,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
}
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
@@ -589,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
struct iwch_reg_user_mr_resp uresp;
PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
- shift = ffs(region->page_size) - 1;
php = to_iwch_pd(pd);
rhp = php->rhp;
@@ -597,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -609,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
i = n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
- region->page_size * k);
+ mhp->umem->page_size * k);
}
}
@@ -623,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = region->virt_base;
+ mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) region->length;
+ mhp->attr.len = (u32) length;
mhp->attr.pbl_size = i;
err = iwch_register_mem(rhp, php, mhp, shift, pages);
kfree(pages);
@@ -648,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
return &mhp->ibmr;
err:
+ ib_umem_release(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
@@ -780,6 +794,9 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
if (rqsize > T3_MAX_RQ_SIZE)
return ERR_PTR(-EINVAL);
+ if (attrs->cap.max_inline_data > T3_MAX_INLINE)
+ return ERR_PTR(-EINVAL);
+
/*
* NOTE: The SQ and total WQ sizes don't need to be
* a power of two. However, all the code assumes
@@ -1107,6 +1124,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.node_type = RDMA_NODE_RNIC;
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+ dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56756b..48833f3f3bd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
struct iwch_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct iwch_dev *rhp;
u64 kva;
struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 0a472c9b44d..714dddbc9a9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -471,43 +471,62 @@ int iwch_bind_mw(struct ib_qp *qp,
return err;
}
-static void build_term_codes(int t3err, u8 *layer_type, u8 *ecode, int tagged)
+static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
+ u8 *layer_type, u8 *ecode)
{
- switch (t3err) {
+ int status = TPT_ERR_INTERNAL_ERR;
+ int tagged = 0;
+ int opcode = -1;
+ int rqtype = 0;
+ int send_inv = 0;
+
+ if (rsp_msg) {
+ status = CQE_STATUS(rsp_msg->cqe);
+ opcode = CQE_OPCODE(rsp_msg->cqe);
+ rqtype = RQ_TYPE(rsp_msg->cqe);
+ send_inv = (opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV);
+ tagged = (opcode == T3_RDMA_WRITE) ||
+ (rqtype && (opcode == T3_READ_RESP));
+ }
+
+ switch (status) {
case TPT_ERR_STAG:
- if (tagged == 1) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_INV_STAG;
- } else if (tagged == 2) {
+ if (send_inv) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ } else {
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
*ecode = RDMAP_INV_STAG;
}
break;
case TPT_ERR_PDID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ if ((opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV))
+ *ecode = RDMAP_CANT_INV_STAG;
+ else
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
case TPT_ERR_QPID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
case TPT_ERR_ACCESS:
- if (tagged == 1) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_STAG_NOT_ASSOC;
- } else if (tagged == 2) {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_STAG_NOT_ASSOC;
- }
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_ACC_VIOL;
break;
case TPT_ERR_WRAP:
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
*ecode = RDMAP_TO_WRAP;
break;
case TPT_ERR_BOUND:
- if (tagged == 1) {
+ if (tagged) {
*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
*ecode = DDPT_BASE_BOUNDS;
- } else if (tagged == 2) {
+ } else {
*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
*ecode = RDMAP_BASE_BOUNDS;
- } else {
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_MSG_TOOBIG;
}
break;
case TPT_ERR_INVALIDATE_SHARED_MR:
@@ -591,8 +610,6 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
{
union t3_wr *wqe;
struct terminate_message *term;
- int status;
- int tagged = 0;
struct sk_buff *skb;
PDBG("%s %d\n", __FUNCTION__, __LINE__);
@@ -610,17 +627,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
/* immediate data starts here. */
term = (struct terminate_message *)wqe->send.sgl;
- if (rsp_msg) {
- status = CQE_STATUS(rsp_msg->cqe);
- if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)
- tagged = 1;
- if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) ||
- (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP))
- tagged = 2;
- } else {
- status = TPT_ERR_INTERNAL_ERR;
- }
- build_term_codes(status, &term->layer_etype, &term->ecode, tagged);
+ build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
build_fw_riwrh((void *)wqe, T3_WR_SEND,
T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
qhp->ep->hwtid, 5);
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 10fb8fbafa0..1d286d3cc2d 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -176,6 +176,7 @@ struct ehca_mr {
struct ib_mr ib_mr; /* must always be first in ehca_mr */
struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
} ib;
+ struct ib_umem *umem;
spinlock_t mrlock;
enum ehca_mr_flag flags;
@@ -276,6 +277,7 @@ void ehca_cleanup_mrmw_cache(void);
extern spinlock_t ehca_qp_idr_lock;
extern spinlock_t ehca_cq_idr_lock;
+extern spinlock_t hcall_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index e2cdc1a16fe..67f0670fe3b 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -113,7 +113,7 @@ struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
return ret;
}
-struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index f284be1c916..100329ba334 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -517,12 +517,11 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
else {
struct ehca_cq *cq = eq->eqe_cache[i].cq;
comp_event_callback(cq);
- spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ spin_lock(&ehca_cq_idr_lock);
cq->nr_events--;
if (!cq->nr_events)
wake_up(&cq->wait_completion);
- spin_unlock_irqrestore(&ehca_cq_idr_lock,
- flags);
+ spin_unlock(&ehca_cq_idr_lock);
}
} else {
ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -711,6 +710,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
kthread_stop(task);
}
+#ifdef CONFIG_HOTPLUG_CPU
static void take_over_work(struct ehca_comp_pool *pool,
int cpu)
{
@@ -735,7 +735,6 @@ static void take_over_work(struct ehca_comp_pool *pool,
}
-#ifdef CONFIG_HOTPLUG_CPU
static int comp_pool_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
@@ -745,6 +744,7 @@ static int comp_pool_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
if(!create_comp_task(pool, cpu)) {
ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
@@ -752,24 +752,29 @@ static int comp_pool_callback(struct notifier_block *nfb,
}
break;
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, any_online_cpu(cpu_online_map));
destroy_comp_task(pool, cpu);
break;
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, cpu);
wake_up_process(cct->task);
break;
case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
break;
case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
destroy_comp_task(pool, cpu);
take_over_work(pool, cpu);
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index 95fd59fb452..37e7fe0908c 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
int num_phys_buf,
int mr_access_flags, u64 *iova_start);
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
int mr_access_flags, struct ib_udata *udata);
int ehca_rereg_phys_mr(struct ib_mr *mr,
@@ -123,7 +122,7 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
-struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata);
@@ -135,7 +134,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
struct ib_qp *ehca_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 3b23d677cb8..c3f99f33b49 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0022");
+MODULE_VERSION("SVNEHCA_0023");
int ehca_open_aqp1 = 0;
int ehca_debug_level = 0;
@@ -62,7 +62,7 @@ int ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_poll_all_eqs = 1;
int ehca_static_rate = -1;
-int ehca_scaling_code = 1;
+int ehca_scaling_code = 0;
module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
module_param_named(debug_level, ehca_debug_level, int, 0);
@@ -98,6 +98,7 @@ MODULE_PARM_DESC(scaling_code,
spinlock_t ehca_qp_idr_lock;
spinlock_t ehca_cq_idr_lock;
+spinlock_t hcall_lock;
DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
@@ -313,6 +314,7 @@ int ehca_init_device(struct ehca_shca *shca)
shca->ib_device.node_type = RDMA_NODE_IB_CA;
shca->ib_device.phys_port_cnt = shca->num_ports;
+ shca->ib_device.num_comp_vectors = 1;
shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev;
shca->ib_device.query_device = ehca_query_device;
shca->ib_device.query_port = ehca_query_port;
@@ -375,7 +377,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
return -EPERM;
}
- ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
+ ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10, 0);
if (IS_ERR(ibcq)) {
ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
return PTR_ERR(ibcq);
@@ -452,15 +454,14 @@ static ssize_t ehca_store_debug_level(struct device_driver *ddp,
DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
ehca_show_debug_level, ehca_store_debug_level);
-void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
-{
- driver_create_file(&drv->driver, &driver_attr_debug_level);
-}
+static struct attribute *ehca_drv_attrs[] = {
+ &driver_attr_debug_level.attr,
+ NULL
+};
-void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
-{
- driver_remove_file(&drv->driver, &driver_attr_debug_level);
-}
+static struct attribute_group ehca_drv_attr_grp = {
+ .attrs = ehca_drv_attrs
+};
#define EHCA_RESOURCE_ATTR(name) \
static ssize_t ehca_show_##name(struct device *dev, \
@@ -522,54 +523,38 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+static struct attribute *ehca_dev_attrs[] = {
+ &dev_attr_adapter_handle.attr,
+ &dev_attr_num_ports.attr,
+ &dev_attr_hw_ver.attr,
+ &dev_attr_max_eq.attr,
+ &dev_attr_cur_eq.attr,
+ &dev_attr_max_cq.attr,
+ &dev_attr_cur_cq.attr,
+ &dev_attr_max_qp.attr,
+ &dev_attr_cur_qp.attr,
+ &dev_attr_max_mr.attr,
+ &dev_attr_cur_mr.attr,
+ &dev_attr_max_mw.attr,
+ &dev_attr_cur_mw.attr,
+ &dev_attr_max_pd.attr,
+ &dev_attr_max_ah.attr,
+ NULL
+};
-void ehca_create_device_sysfs(struct ibmebus_dev *dev)
-{
- device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
- device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
- device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
- device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
- device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
-}
-
-void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
-{
- device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
- device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
- device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
- device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
- device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
-}
+static struct attribute_group ehca_dev_attr_grp = {
+ .attrs = ehca_dev_attrs
+};
static int __devinit ehca_probe(struct ibmebus_dev *dev,
const struct of_device_id *id)
{
struct ehca_shca *shca;
- u64 *handle;
+ const u64 *handle;
struct ib_pd *ibpd;
int ret;
- handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
+ handle = of_get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
if (!handle) {
ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
dev->ofdev.node->full_name);
@@ -667,7 +652,10 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
}
}
- ehca_create_device_sysfs(dev);
+ ret = sysfs_create_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp);
+ if (ret) /* only complain; we can live without attributes */
+ ehca_err(&shca->ib_device,
+ "Cannot create device attributes ret=%d", ret);
spin_lock(&shca_list_lock);
list_add(&shca->shca_list, &shca_list);
@@ -719,7 +707,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
struct ehca_shca *shca = dev->ofdev.dev.driver_data;
int ret;
- ehca_remove_device_sysfs(dev);
+ sysfs_remove_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp);
if (ehca_open_aqp1 == 1) {
int i;
@@ -811,11 +799,12 @@ int __init ehca_module_init(void)
int ret;
printk(KERN_INFO "eHCA Infiniband Device Driver "
- "(Rel.: SVNEHCA_0022)\n");
+ "(Rel.: SVNEHCA_0023)\n");
idr_init(&ehca_qp_idr);
idr_init(&ehca_cq_idr);
spin_lock_init(&ehca_qp_idr_lock);
spin_lock_init(&ehca_cq_idr_lock);
+ spin_lock_init(&hcall_lock);
INIT_LIST_HEAD(&shca_list);
spin_lock_init(&shca_list_lock);
@@ -837,7 +826,9 @@ int __init ehca_module_init(void)
goto module_init2;
}
- ehca_create_driver_sysfs(&ehca_driver);
+ ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp);
+ if (ret) /* only complain; we can live without attributes */
+ ehca_gen_err("Cannot create driver attributes ret=%d", ret);
if (ehca_poll_all_eqs != 1) {
ehca_gen_err("WARNING!!!");
@@ -864,7 +855,7 @@ void __exit ehca_module_exit(void)
if (ehca_poll_all_eqs == 1)
del_timer_sync(&poll_eqs_timer);
- ehca_remove_driver_sysfs(&ehca_driver);
+ sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp);
ibmebus_unregister_driver(&ehca_driver);
ehca_destroy_slab_caches();
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab563633..add79bd44e3 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <rdma/ib_umem.h>
+
#include <asm/current.h>
#include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
- struct ib_umem *region,
- int mr_access_flags,
- struct ib_udata *udata)
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
+ int mr_access_flags, struct ib_udata *udata)
{
struct ib_mr *ib_mr;
struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ehca_gen_err("bad pd=%p", pd);
return ERR_PTR(-EFAULT);
}
- if (!region) {
- ehca_err(pd->device, "bad input values: region=%p", region);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
+
if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
!(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
- if (region->page_size != PAGE_SIZE) {
- ehca_err(pd->device, "page size not supported, "
- "region->page_size=%x", region->page_size);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_user_mr_exit0;
- }
- if ((region->length == 0) ||
- ((region->virt_base + region->length) < region->virt_base)) {
+ if (length == 0 || virt + length < virt) {
ehca_err(pd->device, "bad input values: length=%lx "
- "virt_base=%lx", region->length, region->virt_base);
+ "virt_base=%lx", length, virt);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
goto reg_user_mr_exit0;
}
+ e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags);
+ if (IS_ERR(e_mr->umem)) {
+ ib_mr = (void *) e_mr->umem;
+ goto reg_user_mr_exit1;
+ }
+
+ if (e_mr->umem->page_size != PAGE_SIZE) {
+ ehca_err(pd->device, "page size not supported, "
+ "e_mr->umem->page_size=%x", e_mr->umem->page_size);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit2;
+ }
+
/* determine number of MR pages */
- num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
- PAGE_SIZE - 1) / PAGE_SIZE);
- num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
- EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+ num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
+ PAGE_SIZE);
+ num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
+ EHCA_PAGESIZE);
/* register MR on HCA */
pginfo.type = EHCA_MR_PGI_USER;
pginfo.num_pages = num_pages_mr;
pginfo.num_4k = num_pages_4k;
- pginfo.region = region;
- pginfo.next_4k = region->offset / EHCA_PAGESIZE;
+ pginfo.region = e_mr->umem;
+ pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
- (&region->chunk_list),
+ (&e_mr->umem->chunk_list),
list);
- ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
- region->length, mr_access_flags, e_pd, &pginfo,
- &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+ ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
+ &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
if (ret) {
ib_mr = ERR_PTR(ret);
- goto reg_user_mr_exit1;
+ goto reg_user_mr_exit2;
}
/* successful registration of all pages */
return &e_mr->ib.ib_mr;
+reg_user_mr_exit2:
+ ib_umem_release(e_mr->umem);
reg_user_mr_exit1:
ehca_mr_delete(e_mr);
reg_user_mr_exit0:
if (IS_ERR(ib_mr))
- ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+ ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
" udata=%p",
- PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+ PTR_ERR(ib_mr), pd, mr_access_flags, udata);
return ib_mr;
} /* end ehca_reg_user_mr() */
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
goto dereg_mr_exit0;
}
+ if (e_mr->umem)
+ ib_umem_release(e_mr->umem);
+
/* successful deregistration */
ehca_mr_delete(e_mr);
@@ -2043,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
switch (hipz_rc) {
case H_SUCCESS: /* successful completion */
return 0;
- case H_ADAPTER_PARM: /* invalid adapter handle */
- case H_RT_PARM: /* invalid resource type */
case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
- case H_MLENGTH_PARM: /* invalid memory length */
- case H_MEM_ACCESS_PARM: /* invalid access controls */
case H_CONSTRAINED: /* resource constraint */
- return -EINVAL;
+ case H_NO_MEM:
+ return -ENOMEM;
case H_BUSY: /* long busy */
return -EBUSY;
default:
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index df0516f2437..b5bc787c77b 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -523,6 +523,8 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
goto create_qp_exit1;
}
+ my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+
switch (init_attr->qp_type) {
case IB_QPT_RC:
if (isdaqp == 0) {
@@ -568,7 +570,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
parms.act_nr_send_sges = init_attr->cap.max_send_sge;
parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
- my_qp->real_qp_num =
+ my_qp->ib_qp.qp_num =
(init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
}
@@ -595,7 +597,6 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
my_qp->ib_qp.recv_cq = init_attr->recv_cq;
my_qp->ib_qp.send_cq = init_attr->send_cq;
- my_qp->ib_qp.qp_num = my_qp->real_qp_num;
my_qp->ib_qp.qp_type = init_attr->qp_type;
my_qp->qp_type = init_attr->qp_type;
@@ -968,17 +969,21 @@ static int internal_modify_qp(struct ib_qp *ibqp,
((ehca_mult - 1) / ah_mult) : 0;
else
mqpcb->max_static_rate = 0;
-
update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
/*
+ * Always supply the GRH flag, even if it's zero, to give the
+ * hypervisor a clear "yes" or "no" instead of a "perhaps"
+ */
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+
+ /*
* only if GRH is TRUE we might consider SOURCE_GID_IDX
* and DEST_GID otherwise phype will return H_ATTR_PARM!!!
*/
if (attr->ah_attr.ah_flags == IB_AH_GRH) {
- mqpcb->send_grh_flag = 1 << 31;
- update_mask |=
- EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+ mqpcb->send_grh_flag = 1;
+
mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 08d3f892d9f..caec9dee09e 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -634,11 +634,13 @@ poll_cq_exit0:
return ret;
}
-int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ unsigned long spl_flags;
+ int ret = 0;
- switch (cq_notify) {
+ switch (notify_flags & IB_CQ_SOLICITED_MASK) {
case IB_CQ_SOLICITED:
hipz_set_cqx_n0(my_cq, 1);
break;
@@ -649,5 +651,11 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
return -EINVAL;
}
- return 0;
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+ ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
+ spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+ }
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index b564fcd3b28..5766ae3a202 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -154,7 +154,8 @@ static long ehca_plpar_hcall9(unsigned long opcode,
unsigned long arg9)
{
long ret;
- int i, sleep_msecs;
+ int i, sleep_msecs, lock_is_set = 0;
+ unsigned long flags;
ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
"arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -162,10 +163,18 @@ static long ehca_plpar_hcall9(unsigned long opcode,
arg8, arg9);
for (i = 0; i < 5; i++) {
+ if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) {
+ spin_lock_irqsave(&hcall_lock, flags);
+ lock_is_set = 1;
+ }
+
ret = plpar_hcall9(opcode, outs,
arg1, arg2, arg3, arg4, arg5,
arg6, arg7, arg8, arg9);
+ if (lock_is_set)
+ spin_unlock_irqrestore(&hcall_lock, flags);
+
if (H_IS_LONG_BUSY(ret)) {
sleep_msecs = get_longbusy_msecs(ret);
msleep_interruptible(sleep_msecs);
@@ -193,11 +202,11 @@ static long ehca_plpar_hcall9(unsigned long opcode,
opcode, ret, outs[0], outs[1], outs[2], outs[3],
outs[4], outs[5], outs[6], outs[7], outs[8]);
return ret;
-
}
return H_BUSY;
}
+
u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
struct ehca_pfeq *pfeq,
const u32 neq_control,
@@ -322,7 +331,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
0);
qp->ipz_qp_handle.handle = outs[0];
qp->real_qp_num = (u32)outs[1];
- parms->act_nr_send_sges =
+ parms->act_nr_send_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
parms->act_nr_recv_wqes =
(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index 8199c45768a..57f141a36bc 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -140,6 +140,14 @@ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
return cqe;
}
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+ struct ehca_cqe *cqe = ipz_qeit_get(queue);
+ u32 cqe_flags = cqe->cqe_flags;
+
+ return cqe_flags >> 7 == (queue->toggle_state & 1);
+}
+
/*
* returns and resets Queue Entry iterator
* returns address (kv) of first Queue Entry
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index ea78e6dddc9..3e9241badba 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -204,7 +204,7 @@ static void send_complete(unsigned long data)
*
* Called by ib_create_cq() in the generic verbs code.
*/
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -243,33 +243,21 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) wc;
int err;
+ u32 s = sizeof *wc + sizeof(struct ib_wc) * entries;
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
+ cq->ip = ipath_create_mmap_info(dev, s, context, wc);
+ if (!cq->ip) {
+ ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wc;
+ err = ib_copy_to_udata(udata, &cq->ip->offset,
+ sizeof(cq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
}
- cq->ip = ip;
- ip->context = context;
- ip->obj = wc;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(*wc) +
- sizeof(struct ib_wc) * entries);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
} else
cq->ip = NULL;
@@ -277,12 +265,18 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
spin_unlock(&dev->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
- goto bail_wc;
+ goto bail_ip;
}
dev->n_cqs_allocated++;
spin_unlock(&dev->n_cqs_lock);
+ if (cq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
@@ -301,12 +295,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
goto done;
+bail_ip:
+ kfree(cq->ip);
bail_wc:
vfree(wc);
-
bail_cq:
kfree(cq);
-
done:
return ret;
}
@@ -340,17 +334,18 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
/**
* ipath_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
- * @notify: the type of notification to request
+ * @notify_flags: the type of notification to request
*
* Returns 0 for success.
*
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
*/
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct ipath_cq *cq = to_icq(ibcq);
unsigned long flags;
+ int ret = 0;
spin_lock_irqsave(&cq->lock, flags);
/*
@@ -358,9 +353,15 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
- cq->notify = notify;
+ cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->queue->head != cq->queue->tail)
+ ret = 1;
+
spin_unlock_irqrestore(&cq->lock, flags);
- return 0;
+
+ return ret;
}
/**
@@ -443,13 +444,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
if (cq->ip) {
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_mmap_info *ip = cq->ip;
+ u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe;
- ip->obj = wc;
- ip->size = PAGE_ALIGN(sizeof(*wc) +
- sizeof(struct ib_wc) * cqe);
+ ipath_update_mmap_info(dev, ip, s, wc);
spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index ed55979bfd3..ebd5c7bd2cd 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -38,7 +38,6 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
-#include <linux/pci.h>
#include "ipath_kernel.h"
@@ -524,7 +523,7 @@ static int ipathfs_fill_super(struct super_block *sb, void *data,
int ret;
static struct tree_descr files[] = {
- [1] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
+ [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
{""},
};
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 1b9c3085775..4e2e3dfeb2c 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -747,7 +747,6 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
static int ipath_pe_intconfig(struct ipath_devdata *dd)
{
- u64 val;
u32 chiprev;
/*
@@ -760,9 +759,9 @@ static int ipath_pe_intconfig(struct ipath_devdata *dd)
if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
/* Rev2+ reports extra errors via internal GPIO pins */
dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
- val |= IPATH_GPIO_ERRINTR_MASK;
- ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
}
return 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 45d033169c6..a90d3b5699c 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -1056,7 +1056,7 @@ irqreturn_t ipath_intr(int irq, void *data)
gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
chk0rcv = 1;
}
- if (unlikely(gpiostatus)) {
+ if (gpiostatus) {
/*
* Some unexpected bits remain. If they could have
* caused the interrupt, complain and clear.
@@ -1065,9 +1065,8 @@ irqreturn_t ipath_intr(int irq, void *data)
* GPIO interrupts, possibly on a "three strikes"
* basis.
*/
- u32 mask;
- mask = ipath_read_kreg32(
- dd, dd->ipath_kregs->kr_gpio_mask);
+ const u32 mask = (u32) dd->ipath_gpio_mask;
+
if (mask & gpiostatus) {
ipath_dbg("Unexpected GPIO IRQ bits %x\n",
gpiostatus & mask);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e900c2593f4..12194f3dd8c 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -397,6 +397,8 @@ struct ipath_devdata {
unsigned long ipath_pioavailshadow[8];
/* shadow of kr_gpio_out, for rmw ops */
u64 ipath_gpio_out;
+ /* shadow the gpio mask register */
+ u64 ipath_gpio_mask;
/* kr_revision shadow */
u64 ipath_revision;
/*
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index e46aa4ed2a7..05a1d2b01d9 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -37,7 +37,6 @@
*/
#include <linux/io.h>
-#include <linux/pci.h>
#include <asm/byteorder.h>
#include "ipath_kernel.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index a82157db468..937bc3396b5 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -46,6 +46,11 @@ void ipath_release_mmap_info(struct kref *ref)
{
struct ipath_mmap_info *ip =
container_of(ref, struct ipath_mmap_info, ref);
+ struct ipath_ibdev *dev = to_idev(ip->context->device);
+
+ spin_lock_irq(&dev->pending_lock);
+ list_del(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
vfree(ip->obj);
kfree(ip);
@@ -60,14 +65,12 @@ static void ipath_vma_open(struct vm_area_struct *vma)
struct ipath_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
- ip->mmap_cnt++;
}
static void ipath_vma_close(struct vm_area_struct *vma)
{
struct ipath_mmap_info *ip = vma->vm_private_data;
- ip->mmap_cnt--;
kref_put(&ip->ref, ipath_release_mmap_info);
}
@@ -87,7 +90,7 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct ipath_ibdev *dev = to_idev(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
- struct ipath_mmap_info *ip, **pp;
+ struct ipath_mmap_info *ip, *pp;
int ret = -EINVAL;
/*
@@ -96,15 +99,16 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
spin_lock_irq(&dev->pending_lock);
- for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
+ list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+ pending_mmaps) {
/* Only the creator is allowed to mmap the object */
- if (context != ip->context || (void *) offset != ip->obj)
+ if (context != ip->context || (__u64) offset != ip->offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->size)
break;
- *pp = ip->next;
+ list_del_init(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
@@ -119,3 +123,51 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
done:
return ret;
}
+
+/*
+ * Allocate information for ipath_mmap
+ */
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj) {
+ struct ipath_mmap_info *ip;
+
+ ip = kmalloc(sizeof *ip, GFP_KERNEL);
+ if (!ip)
+ goto bail;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ INIT_LIST_HEAD(&ip->pending_mmaps);
+ ip->size = size;
+ ip->context = context;
+ ip->obj = obj;
+ kref_init(&ip->ref);
+
+bail:
+ return ip;
+}
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj) {
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ ip->size = size;
+ ip->obj = obj;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 31e70732e36..bdeef8d4f27 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
+ mr->umem = NULL;
m = 0;
n = 0;
@@ -170,46 +172,56 @@ bail:
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
- * @region: the user memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags, struct ib_udata *udata)
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
{
struct ipath_mr *mr;
+ struct ib_umem *umem;
struct ib_umem_chunk *chunk;
int n, m, i;
struct ib_mr *ret;
- if (region->length == 0) {
+ if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
+ umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
+ if (IS_ERR(umem))
+ return (void *) umem;
+
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
+ ib_umem_release(umem);
goto bail;
}
mr->mr.pd = pd;
- mr->mr.user_base = region->user_base;
- mr->mr.iova = region->virt_base;
- mr->mr.length = region->length;
- mr->mr.offset = region->offset;
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = umem->offset;
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
+ mr->umem = umem;
m = 0;
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list) {
+ list_for_each_entry(chunk, &umem->chunk_list, list) {
for (i = 0; i < chunk->nents; i++) {
void *vaddr;
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = region->page_size;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
i--;
kfree(mr->mr.map[i]);
}
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
kfree(mr);
return 0;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 16db9ac0b40..bfef08ecd34 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -844,34 +844,36 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) qp->r_rq.wq;
int err;
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
- goto bail_rwq;
- }
+ if (!qp->r_rq.wq) {
+ __u64 offset = 0;
- if (qp->r_rq.wq) {
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
+ err = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rwq;
+ }
+ } else {
+ u32 s = sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz;
+
+ qp->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ qp->r_rq.wq);
+ if (!qp->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_rwq;
}
- qp->ip = ip;
- ip->context = ibpd->uobject->context;
- ip->obj = qp->r_rq.wq;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- qp->r_rq.size * sz);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
+
+ err = ib_copy_to_udata(udata, &(qp->ip->offset),
+ sizeof(qp->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
}
}
@@ -885,6 +887,12 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
dev->n_qps_allocated++;
spin_unlock(&dev->n_qps_lock);
+ if (qp->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
ret = &qp->ibqp;
goto bail;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index b4b88d0b53f..1915771fd03 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -98,13 +98,21 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
case OP(RDMA_READ_RESPONSE_LAST):
case OP(RDMA_READ_RESPONSE_ONLY):
case OP(ATOMIC_ACKNOWLEDGE):
- qp->s_ack_state = OP(ACKNOWLEDGE);
+ /*
+ * We can increment the tail pointer now that the last
+ * response has been sent instead of only being
+ * constructed.
+ */
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
/* FALLTHROUGH */
+ case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
if (qp->s_flags & IPATH_S_ACK_PENDING)
goto normal;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
goto bail;
}
@@ -117,12 +125,8 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
- } else {
+ } else
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
- if (++qp->s_tail_ack_queue >
- IPATH_MAX_RDMA_ATOMIC)
- qp->s_tail_ack_queue = 0;
- }
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
@@ -139,8 +143,6 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
cpu_to_be32(e->atomic_data);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn;
- if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
- qp->s_tail_ack_queue = 0;
}
bth0 = qp->s_ack_state << 24;
break;
@@ -156,8 +158,6 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
ohdr->u.aeth = ipath_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
- if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
- qp->s_tail_ack_queue = 0;
}
bth0 = qp->s_ack_state << 24;
bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -171,7 +171,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
* the ACK before setting s_ack_state to ACKNOWLEDGE
* (see above).
*/
- qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~IPATH_S_ACK_PENDING;
qp->s_cur_sge = NULL;
if (qp->s_nak_state)
@@ -223,23 +223,18 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* Sending responses has higher priority over sending requests. */
if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
(qp->s_flags & IPATH_S_ACK_PENDING) ||
- qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+ qp->s_ack_state != OP(ACKNOWLEDGE)) &&
ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
goto done;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
- qp->s_rnr_timeout)
+ qp->s_rnr_timeout || qp->s_wait_credit)
goto bail;
/* Limit the number of packets sent without an ACK. */
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
qp->s_wait_credit = 1;
dev->n_rc_stalls++;
- spin_lock(&dev->pending_lock);
- if (list_empty(&qp->timerwait))
- list_add_tail(&qp->timerwait,
- &dev->pending[dev->pending_index]);
- spin_unlock(&dev->pending_lock);
goto bail;
}
@@ -587,9 +582,12 @@ static void send_rc_ack(struct ipath_qp *qp)
u32 hwords;
struct ipath_ib_header hdr;
struct ipath_other_headers *ohdr;
+ unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
- if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+ if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != OP(ACKNOWLEDGE))
goto queue_ack;
/* Construct the header. */
@@ -640,11 +638,11 @@ static void send_rc_ack(struct ipath_qp *qp)
dev->n_rc_qacks++;
queue_ack:
- spin_lock_irq(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
qp->s_flags |= IPATH_S_ACK_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
- spin_unlock_irq(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
/* Call ipath_do_rc_send() in another thread. */
tasklet_hi_schedule(&qp->s_task);
@@ -1261,6 +1259,7 @@ ack_err:
wc.dlid_path_bits = 0;
wc.port_num = 0;
ipath_sqerror_qp(qp, &wc);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
bail:
return;
}
@@ -1294,6 +1293,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
struct ipath_ack_entry *e;
u8 i, prev;
int old_req;
+ unsigned long flags;
if (diff > 0) {
/*
@@ -1327,7 +1327,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
psn &= IPATH_PSN_MASK;
e = NULL;
old_req = 1;
- spin_lock_irq(&qp->s_lock);
+ spin_lock_irqsave(&qp->s_lock, flags);
for (i = qp->r_head_ack_queue; ; i = prev) {
if (i == qp->s_tail_ack_queue)
old_req = 0;
@@ -1425,7 +1425,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* after all the previous RDMA reads and atomics.
*/
if (i == qp->r_head_ack_queue) {
- spin_unlock_irq(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
qp->r_nak_state = 0;
qp->r_ack_psn = qp->r_psn - 1;
goto send_ack;
@@ -1439,11 +1439,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
break;
}
qp->r_nak_state = 0;
- spin_unlock_irq(&qp->s_lock);
tasklet_hi_schedule(&qp->s_task);
unlock_done:
- spin_unlock_irq(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
done:
return 1;
@@ -1453,10 +1452,12 @@ send_ack:
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{
- spin_lock_irq(&qp->s_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
qp->state = IB_QPS_ERR;
ipath_error_qp(qp, err);
- spin_unlock_irq(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
}
/**
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 94033503400..03acae66ba8 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -139,33 +139,24 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
- struct ipath_mmap_info *ip;
- __u64 offset = (__u64) srq->rq.wq;
int err;
+ u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
- err = ib_copy_to_udata(udata, &offset, sizeof(offset));
- if (err) {
- ret = ERR_PTR(err);
+ srq->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ srq->rq.wq);
+ if (!srq->ip) {
+ ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
- /* Allocate info for ipath_mmap(). */
- ip = kmalloc(sizeof(*ip), GFP_KERNEL);
- if (!ip) {
- ret = ERR_PTR(-ENOMEM);
- goto bail_wq;
+ err = ib_copy_to_udata(udata, &srq->ip->offset,
+ sizeof(srq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
}
- srq->ip = ip;
- ip->context = ibpd->uobject->context;
- ip->obj = srq->rq.wq;
- kref_init(&ip->ref);
- ip->mmap_cnt = 0;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- srq->rq.size * sz);
- spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
- spin_unlock_irq(&dev->pending_lock);
} else
srq->ip = NULL;
@@ -181,21 +172,27 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
spin_unlock(&dev->n_srqs_lock);
ret = ERR_PTR(-ENOMEM);
- goto bail_wq;
+ goto bail_ip;
}
dev->n_srqs_allocated++;
spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
ret = &srq->ibsrq;
goto done;
+bail_ip:
+ kfree(srq->ip);
bail_wq:
vfree(srq->rq.wq);
-
bail_srq:
kfree(srq);
-
done:
return ret;
}
@@ -312,13 +309,13 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (srq->ip) {
struct ipath_mmap_info *ip = srq->ip;
struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+ u32 s = sizeof(struct ipath_rwq) + size * sz;
- ip->obj = wq;
- ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
- size * sz);
+ ipath_update_mmap_info(dev, ip, s, wq);
spin_lock_irq(&dev->pending_lock);
- ip->next = dev->pending_mmaps;
- dev->pending_mmaps = ip;
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps,
+ &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 9307f7187ca..d8b5e4cefe2 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -31,8 +31,6 @@
* SOFTWARE.
*/
-#include <linux/pci.h>
-
#include "ipath_kernel.h"
struct infinipath_stats ipath_stats;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index ffa6318ad0c..4dc398d5e01 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -32,7 +32,6 @@
*/
#include <linux/ctype.h>
-#include <linux/pci.h>
#include "ipath_kernel.h"
#include "ipath_common.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 18c6df2052c..bb70845279b 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1387,13 +1387,12 @@ static int enable_timer(struct ipath_devdata *dd)
* processing.
*/
if (dd->ipath_flags & IPATH_GPIO_INTR) {
- u64 val;
ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
0x2074076542310ULL);
/* Enable GPIO bit 2 interrupt */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
- val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
- ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
}
init_timer(&dd->verbs_timer);
@@ -1412,8 +1411,9 @@ static int disable_timer(struct ipath_devdata *dd)
u64 val;
/* Disable GPIO bit 2 interrupt */
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
- val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
- ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+ dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
/*
* We might want to undo changes to debugportselect,
* but how?
@@ -1476,7 +1476,10 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
ret = -ENOMEM;
goto err_lk;
}
+ INIT_LIST_HEAD(&idev->pending_mmaps);
spin_lock_init(&idev->pending_lock);
+ idev->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&idev->mmap_offset_lock);
INIT_LIST_HEAD(&idev->pending[0]);
INIT_LIST_HEAD(&idev->pending[1]);
INIT_LIST_HEAD(&idev->pending[2]);
@@ -1558,6 +1561,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
+ dev->num_comp_vectors = 1;
dev->dma_device = &dd->pcidev->dev;
dev->query_device = ipath_query_device;
dev->modify_device = ipath_modify_device;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7c4929f1cb5..088b837ebea 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -173,12 +173,12 @@ struct ipath_ah {
* this as its vm_private_data.
*/
struct ipath_mmap_info {
- struct ipath_mmap_info *next;
+ struct list_head pending_mmaps;
struct ib_ucontext *context;
void *obj;
+ __u64 offset;
struct kref ref;
unsigned size;
- unsigned mmap_cnt;
};
/*
@@ -251,6 +251,7 @@ struct ipath_sge {
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */
};
@@ -422,7 +423,7 @@ struct ipath_qp {
#define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08
-#define IPATH_PSN_CREDIT 2048
+#define IPATH_PSN_CREDIT 512
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
@@ -485,9 +486,10 @@ struct ipath_opcode_stats {
struct ipath_ibdev {
struct ib_device ibdev;
- struct list_head dev_list;
struct ipath_devdata *dd;
- struct ipath_mmap_info *pending_mmaps;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock;
+ u32 mmap_offset;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
@@ -734,13 +736,13 @@ int ipath_destroy_srq(struct ib_srq *ibsrq);
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata);
int ipath_destroy_cq(struct ib_cq *ibcq);
-int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
@@ -750,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int mr_access_flags,
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
@@ -768,6 +770,15 @@ int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
void ipath_release_mmap_info(struct kref *ref);
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj);
+
int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 085e28b939e..dd691cfa507 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -165,10 +165,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
{
struct rb_node **n = &mcast_tree.rb_node;
struct rb_node *pn = NULL;
- unsigned long flags;
int ret;
- spin_lock_irqsave(&mcast_lock, flags);
+ spin_lock_irq(&mcast_lock);
while (*n) {
struct ipath_mcast *tmcast;
@@ -228,7 +227,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
ret = 0;
bail:
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
return ret;
}
@@ -289,17 +288,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
- unsigned long flags;
int last = 0;
int ret;
- spin_lock_irqsave(&mcast_lock, flags);
+ spin_lock_irq(&mcast_lock);
/* Find the GID in the mcast table. */
n = mcast_tree.rb_node;
while (1) {
if (n == NULL) {
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
ret = -EINVAL;
goto bail;
}
@@ -334,7 +332,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
break;
}
- spin_unlock_irqrestore(&mcast_lock, flags);
+ spin_unlock_irq(&mcast_lock);
if (p) {
/*
@@ -348,9 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
- spin_lock(&dev->n_mcast_grps_lock);
+ spin_lock_irq(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
- spin_unlock(&dev->n_mcast_grps_lock);
+ spin_unlock_irq(&dev->n_mcast_grps_lock);
}
ret = 0;
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
new file mode 100644
index 00000000000..b8912cdb966
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -0,0 +1,9 @@
+config MLX4_INFINIBAND
+ tristate "Mellanox ConnectX HCA support"
+ depends on INFINIBAND
+ select MLX4_CORE
+ ---help---
+ This driver provides low-level InfiniBand support for
+ Mellanox ConnectX PCI Express host channel adapters (HCAs).
+ This is required to use InfiniBand protocols such as
+ IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
new file mode 100644
index 00000000000..70f09c7826d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
+
+mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
new file mode 100644
index 00000000000..c75ac9463e2
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4_ib.h"
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct mlx4_ib_ah *ah;
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ memset(&ah->av, 0, sizeof ah->av);
+
+ ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+ ah->av.g_slid = ah_attr->src_path_bits;
+ ah->av.dlid = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+ ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.stat_rate;
+ }
+ ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah->av.g_slid |= 0x80;
+ ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
+ memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ }
+
+ return &ah->ibah;
+}
+
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct mlx4_ib_ah *ah = to_mah(ibah);
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(ah->av.dlid);
+ ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
+ ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
+ if (ah->av.stat_rate)
+ ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+
+ if (mlx4_ib_ah_grh_present(ah)) {
+ ah_attr->ah_flags = IB_AH_GRH;
+
+ ah_attr->grh.traffic_class =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
+ ah_attr->grh.flow_label =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
+ ah_attr->grh.hop_limit = ah->av.hop_limit;
+ ah_attr->grh.sgid_index = ah->av.gid_index;
+ memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+ }
+
+ return 0;
+}
+
+int mlx4_ib_destroy_ah(struct ib_ah *ah)
+{
+ kfree(to_mah(ah));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
new file mode 100644
index 00000000000..b2a290c6703
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
+{
+ struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_cq *ibcq;
+
+ if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on CQ %06x\n", type, cq->cqn);
+ return;
+ }
+
+ ibcq = &to_mibcq(cq)->ibcq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.event = IB_EVENT_CQ_ERR;
+ event.element.cq = ibcq;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
+}
+
+static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
+{
+ int offset = n * sizeof (struct mlx4_cqe);
+
+ if (buf->buf.nbufs == 1)
+ return buf->buf.u.direct.buf + offset;
+ else
+ return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
+ (offset & (PAGE_SIZE - 1));
+}
+
+static void *get_cqe(struct mlx4_ib_cq *cq, int n)
+{
+ return get_cqe_from_buf(&cq->buf, n);
+}
+
+static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
+{
+ struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+
+ return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+}
+
+static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
+{
+ return get_sw_cqe(cq, cq->mcq.cons_index);
+}
+
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_cq *cq;
+ struct mlx4_uar *uar;
+ int buf_size;
+ int err;
+
+ if (entries < 1 || entries > dev->dev->caps.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ entries = roundup_pow_of_two(entries + 1);
+ cq->ibcq.cqe = entries - 1;
+ buf_size = entries * sizeof (struct mlx4_cqe);
+ spin_lock_init(&cq->lock);
+
+ if (context) {
+ struct mlx4_ib_create_cq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_cq;
+ }
+
+ cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ err = PTR_ERR(cq->umem);
+ goto err_cq;
+ }
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
+ ilog2(cq->umem->page_size), &cq->buf.mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+ &cq->db);
+ if (err)
+ goto err_mtt;
+
+ uar = &to_mucontext(context)->uar;
+ } else {
+ err = mlx4_ib_db_alloc(dev, &cq->db, 1);
+ if (err)
+ goto err_cq;
+
+ cq->mcq.set_ci_db = cq->db.db;
+ cq->mcq.arm_db = cq->db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+ *cq->mcq.arm_db = 0;
+
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+
+ err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
+ &cq->buf.mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
+ if (err)
+ goto err_mtt;
+
+ uar = &dev->priv_uar;
+ }
+
+ err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
+ cq->db.dma, &cq->mcq);
+ if (err)
+ goto err_dbmap;
+
+ cq->mcq.comp = mlx4_ib_cq_comp;
+ cq->mcq.event = mlx4_ib_cq_event;
+
+ if (context)
+ if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
+ err = -EFAULT;
+ goto err_dbmap;
+ }
+
+ return &cq->ibcq;
+
+err_dbmap:
+ if (context)
+ mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
+
+err_buf:
+ if (context)
+ ib_umem_release(cq->umem);
+ else
+ mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
+ &cq->buf.buf);
+
+err_db:
+ if (!context)
+ mlx4_ib_db_free(dev, &cq->db);
+
+err_cq:
+ kfree(cq);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_destroy_cq(struct ib_cq *cq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(cq->device);
+ struct mlx4_ib_cq *mcq = to_mcq(cq);
+
+ mlx4_cq_free(dev->dev, &mcq->mcq);
+ mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
+
+ if (cq->uobject) {
+ mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
+ ib_umem_release(mcq->umem);
+ } else {
+ mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
+ &mcq->buf.buf);
+ mlx4_ib_db_free(dev, &mcq->db);
+ }
+
+ kfree(mcq);
+
+ return 0;
+}
+
+static void dump_cqe(void *cqe)
+{
+ __be32 *buf = cqe;
+
+ printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
+ be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
+ be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
+}
+
+static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
+ struct ib_wc *wc)
+{
+ if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
+ printk(KERN_DEBUG "local QP operation err "
+ "(QPN %06x, WQE index %x, vendor syndrome %02x, "
+ "opcode = %02x)\n",
+ be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
+ cqe->vendor_err_syndrome,
+ cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
+ dump_cqe(cqe);
+ }
+
+ switch (cqe->syndrome) {
+ case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_MW_BIND_ERR:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
+ wc->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+ wc->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ wc->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
+ wc->status = IB_WC_REM_OP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ wc->status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
+ wc->status = IB_WC_REM_ABORT_ERR;
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ wc->vendor_err = cqe->vendor_err_syndrome;
+}
+
+static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
+ struct mlx4_ib_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct mlx4_cqe *cqe;
+ struct mlx4_qp *mqp;
+ struct mlx4_ib_wq *wq;
+ struct mlx4_ib_srq *srq;
+ int is_send;
+ int is_error;
+ u16 wqe_ctr;
+
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++cq->mcq.cons_index;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
+ is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR;
+
+ if (!*cur_qp ||
+ (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
+ be32_to_cpu(cqe->my_qpn));
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
+ cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
+ return -EINVAL;
+ }
+
+ *cur_qp = to_mibqp(mqp);
+ }
+
+ wc->qp = &(*cur_qp)->ibqp;
+
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wq->tail += wqe_ctr - (u16) wq->tail;
+ wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibqp.srq) {
+ srq = to_msrq((*cur_qp)->ibqp.srq);
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx4_ib_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
+ ++wq->tail;
+ }
+
+ if (unlikely(is_error)) {
+ mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
+ return 0;
+ }
+
+ wc->status = IB_WC_SUCCESS;
+
+ if (is_send) {
+ wc->wc_flags = 0;
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_OPCODE_RDMA_WRITE_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX4_OPCODE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case MLX4_OPCODE_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX4_OPCODE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case MLX4_OPCODE_RDMA_READ:
+ wc->opcode = IB_WC_SEND;
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MLX4_OPCODE_ATOMIC_CS:
+ wc->opcode = IB_WC_COMP_SWAP;
+ wc->byte_len = 8;
+ break;
+ case MLX4_OPCODE_ATOMIC_FA:
+ wc->opcode = IB_WC_FETCH_ADD;
+ wc->byte_len = 8;
+ break;
+ case MLX4_OPCODE_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+ }
+ } else {
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->imm_data = cqe->immed_rss_invalid;
+ break;
+ case MLX4_RECV_OPCODE_SEND:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+ break;
+ case MLX4_RECV_OPCODE_SEND_IMM:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->imm_data = cqe->immed_rss_invalid;
+ break;
+ }
+
+ wc->slid = be16_to_cpu(cqe->rlid);
+ wc->sl = cqe->sl >> 4;
+ wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
+ wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
+ wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
+ IB_WC_GRH : 0;
+ wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16;
+ }
+
+ return 0;
+}
+
+int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
+ struct mlx4_ib_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
+ if (err)
+ break;
+ }
+
+ if (npolled)
+ mlx4_cq_set_ci(&cq->mcq);
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (err == 0 || err == -EAGAIN)
+ return npolled;
+ else
+ return err;
+}
+
+int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ mlx4_cq_arm(&to_mcq(ibcq)->mcq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
+ to_mdev(ibcq->device)->uar_map,
+ MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
+
+ return 0;
+}
+
+void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
+{
+ u32 prod_index;
+ int nfreed = 0;
+ struct mlx4_cqe *cqe;
+
+ /*
+ * First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
+ if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
+ break;
+
+ /*
+ * Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
+ if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
+ mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
+ ++nfreed;
+ } else if (nfreed)
+ memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
+ cqe, sizeof *cqe);
+ }
+
+ if (nfreed) {
+ cq->mcq.cons_index += nfreed;
+ /*
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
+ wmb();
+ mlx4_cq_set_ci(&cq->mcq);
+ }
+}
+
+void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
+{
+ spin_lock_irq(&cq->lock);
+ __mlx4_ib_cq_clean(cq, qpn, srq);
+ spin_unlock_irq(&cq->lock);
+}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
new file mode 100644
index 00000000000..1c36087aef1
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+
+#include "mlx4_ib.h"
+
+struct mlx4_ib_db_pgdir {
+ struct list_head list;
+ DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
+ DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
+ unsigned long *bits[2];
+ __be32 *db_page;
+ dma_addr_t db_dma;
+};
+
+static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
+{
+ struct mlx4_ib_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
+ pgdir->bits[0] = pgdir->order0;
+ pgdir->bits[1] = pgdir->order1;
+ pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
+ PAGE_SIZE, &pgdir->db_dma,
+ GFP_KERNEL);
+ if (!pgdir->db_page) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
+ struct mlx4_ib_db *db, int order)
+{
+ int o;
+ int i;
+
+ for (o = order; o <= 1; ++o) {
+ i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
+ if (i < MLX4_IB_DB_PER_PAGE >> o)
+ goto found;
+ }
+
+ return -ENOMEM;
+
+found:
+ clear_bit(i, pgdir->bits[o]);
+
+ i <<= o;
+
+ if (o > order)
+ set_bit(i ^ 1, pgdir->bits[order]);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ db->db = pgdir->db_page + db->index;
+ db->dma = pgdir->db_dma + db->index * 4;
+ db->order = order;
+
+ return 0;
+}
+
+int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
+{
+ struct mlx4_ib_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&dev->pgdir_mutex);
+
+ list_for_each_entry(pgdir, &dev->pgdir_list, list)
+ if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
+ goto out;
+
+ pgdir = mlx4_ib_alloc_db_pgdir(dev);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &dev->pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+ mutex_unlock(&dev->pgdir_mutex);
+
+ return ret;
+}
+
+void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
+{
+ int o;
+ int i;
+
+ mutex_lock(&dev->pgdir_mutex);
+
+ o = db->order;
+ i = db->index;
+
+ if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+ clear_bit(i ^ 1, db->u.pgdir->order0);
+ ++o;
+ }
+
+ i >>= o;
+ set_bit(i, db->u.pgdir->bits[o]);
+
+ if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
+ dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
+ db->u.pgdir->db_page, db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&dev->pgdir_mutex);
+}
+
+struct mlx4_ib_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ int refcnt;
+};
+
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+ struct mlx4_ib_db *db)
+{
+ struct mlx4_ib_user_db_page *page;
+ struct ib_umem_chunk *chunk;
+ int err = 0;
+
+ mutex_lock(&context->db_page_mutex);
+
+ list_for_each_entry(page, &context->db_page_list, list)
+ if (page->user_virt == (virt & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof *page, GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ page->user_virt = (virt & PAGE_MASK);
+ page->refcnt = 0;
+ page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ err = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->db_page_list);
+
+found:
+ chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
+ db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->u.user_page = page;
+ ++page->refcnt;
+
+out:
+ mutex_unlock(&context->db_page_mutex);
+
+ return err;
+}
+
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
+{
+ mutex_lock(&context->db_page_mutex);
+
+ if (!--db->u.user_page->refcnt) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->db_page_mutex);
+}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
new file mode 100644
index 00000000000..333091787c5
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_ib.h"
+
+enum {
+ MLX4_IB_VENDOR_CLASS1 = 0x9,
+ MLX4_IB_VENDOR_CLASS2 = 0xa
+};
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad)
+{
+ struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
+ void *inbox;
+ int err;
+ u32 in_modifier = port;
+ u8 op_modifier = 0;
+
+ inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+ inbox = inmailbox->buf;
+
+ outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(outmailbox)) {
+ mlx4_free_cmd_mailbox(dev->dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ memcpy(inbox, in_mad, 256);
+
+ /*
+ * Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ if (in_wc) {
+ struct {
+ __be32 my_qpn;
+ u32 reserved1;
+ __be32 rqpn;
+ u8 sl;
+ u8 g_path;
+ u16 reserved2[2];
+ __be16 pkey;
+ u32 reserved3[11];
+ u8 grh[40];
+ } *ext_info;
+
+ memset(inbox + 256, 0, 256);
+ ext_info = inbox + 256;
+
+ ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
+ ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
+ ext_info->sl = in_wc->sl << 4;
+ ext_info->g_path = in_wc->dlid_path_bits |
+ (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
+ ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
+
+ if (in_grh)
+ memcpy(ext_info->grh, in_grh, 40);
+
+ op_modifier |= 0x4;
+
+ in_modifier |= in_wc->slid << 16;
+ }
+
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
+ in_modifier, op_modifier,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+
+ if (!err);
+ memcpy(response_mad, outmailbox->buf, 256);
+
+ mlx4_free_cmd_mailbox(dev->dev, inmailbox);
+ mlx4_free_cmd_mailbox(dev->dev, outmailbox);
+
+ return err;
+}
+
+static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
+{
+ struct ib_ah *new_ah;
+ struct ib_ah_attr ah_attr;
+
+ if (!dev->send_agent[port_num - 1][0])
+ return;
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = lid;
+ ah_attr.sl = sl;
+ ah_attr.port_num = port_num;
+
+ new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
+ if (IS_ERR(new_ah))
+ return;
+
+ spin_lock(&dev->sm_lock);
+ if (dev->sm_ah[port_num - 1])
+ ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ dev->sm_ah[port_num - 1] = new_ah;
+ spin_unlock(&dev->sm_lock);
+}
+
+/*
+ * Snoop SM MADs for port info and P_Key table sets, so we can
+ * synthesize LID change and P_Key change events.
+ */
+static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
+{
+ struct ib_event event;
+
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+ struct ib_port_info *pinfo =
+ (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+
+ update_sm_ah(to_mdev(ibdev), port_num,
+ be16_to_cpu(pinfo->sm_lid),
+ pinfo->neighbormtu_mastersmsl & 0xf);
+
+ event.device = ibdev;
+ event.element.port_num = port_num;
+
+ if(pinfo->clientrereg_resv_subnetto & 0x80)
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ else
+ event.event = IB_EVENT_LID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ event.device = ibdev;
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+ }
+ }
+}
+
+static void node_desc_override(struct ib_device *dev,
+ struct ib_mad *mad)
+{
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+ mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+ spin_lock(&to_mdev(dev)->sm_lock);
+ memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+ spin_unlock(&to_mdev(dev)->sm_lock);
+ }
+}
+
+static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
+{
+ int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
+ int ret;
+
+ if (agent) {
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ /*
+ * We rely here on the fact that MLX QPs don't use the
+ * address handle after the send is posted (this is
+ * wrong following the IB spec strictly, but we know
+ * it's OK for our devices).
+ */
+ spin_lock(&dev->sm_lock);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
+ else
+ ret = -EINVAL;
+ spin_unlock(&dev->sm_lock);
+
+ if (ret)
+ ib_free_send_mad(send_buf);
+ }
+}
+
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ u16 slid;
+ int err;
+
+ slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
+ forward_trap(to_mdev(ibdev), port_num, in_mad);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ }
+
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /*
+ * Don't process SMInfo queries or vendor-specific
+ * MADs -- the SMA can't handle them.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ IB_SMP_ATTR_VENDOR_MASK))
+ return IB_MAD_RESULT_SUCCESS;
+ } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
+ in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } else
+ return IB_MAD_RESULT_SUCCESS;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev),
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ if (!out_mad->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in_mad);
+ node_desc_override(ibdev, out_mad);
+ }
+
+ /* set return bit in status of directed route responses */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+ int ret;
+
+ for (p = 0; p < dev->dev->caps.num_ports; ++p)
+ for (q = 0; q <= 1; ++q) {
+ agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+ q ? IB_QPT_GSI : IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+ dev->send_agent[p][q] = agent;
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < dev->dev->caps.num_ports; ++p)
+ for (q = 0; q <= 1; ++q)
+ if (dev->send_agent[p][q])
+ ib_unregister_mad_agent(dev->send_agent[p][q]);
+
+ return ret;
+}
+
+void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+
+ for (p = 0; p < dev->dev->caps.num_ports; ++p) {
+ for (q = 0; q <= 1; ++q) {
+ agent = dev->send_agent[p][q];
+ dev->send_agent[p][q] = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+
+ if (dev->sm_ah[p])
+ ib_destroy_ah(dev->sm_ah[p]);
+ }
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
new file mode 100644
index 00000000000..402f3a20ec0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+#define DRV_NAME "mlx4_ib"
+#define DRV_VERSION "0.01"
+#define DRV_RELDATE "May 1, 2006"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const char mlx4_ib_version[] __devinitdata =
+ DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static void init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = 1;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
+
+static int mlx4_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ props->fw_ver = dev->dev->caps.fw_ver;
+ props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
+ IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
+ props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
+ props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+
+ props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
+ 0xffffff;
+ props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
+
+ props->max_mr_size = ~0ull;
+ props->page_size_cap = dev->dev->caps.page_size_cap;
+ props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp_wr = dev->dev->caps.max_wqes;
+ props->max_sge = min(dev->dev->caps.max_sq_sg,
+ dev->dev->caps.max_rq_sg);
+ props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cqe = dev->dev->caps.max_cqes;
+ props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
+ props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
+ props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq_wr = dev->dev->caps.max_srq_wqes;
+ props->max_srq_sge = dev->dev->caps.max_srq_sge;
+ props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
+ props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = dev->dev->caps.pkey_table_len;
+ props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
+ props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+ props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
+ props->lmc = out_mad->data[34] & 0x7;
+ props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
+ props->sm_sl = out_mad->data[36] & 0xf;
+ props->state = out_mad->data[32] & 0xf;
+ props->phys_state = out_mad->data[33] >> 4;
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len;
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw, out_mad->data + 8, 8);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props)
+{
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock(&to_mdev(ibdev)->sm_lock);
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ spin_unlock(&to_mdev(ibdev)->sm_lock);
+ }
+
+ return 0;
+}
+
+static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, 256);
+ *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
+ ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
+
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B);
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct ib_port_attr attr;
+ u32 cap_mask;
+ int err;
+
+ mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+
+ err = mlx4_ib_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mlx4_SET_PORT(to_mdev(ibdev), port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
+
+out:
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ return err;
+}
+
+static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp resp;
+ int err;
+
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ INIT_LIST_HEAD(&context->db_page_list);
+ mutex_init(&context->db_page_mutex);
+
+ err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (err) {
+ mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+}
+
+static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
+
+ mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
+ kfree(context);
+
+ return 0;
+}
+
+static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->device);
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_pgoff == 0) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+ } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ /* FIXME want pgprot_writecombine() for BlueFlame pages */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_pd *pd;
+ int err;
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ if (context)
+ if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
+ mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &pd->ibpd;
+}
+
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
+{
+ mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+ kfree(pd);
+
+ return 0;
+}
+
+static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
+ &to_mqp(ibqp)->mqp, gid->raw);
+}
+
+static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
+ &to_mqp(ibqp)->mqp, gid->raw);
+}
+
+static int init_node_data(struct mlx4_ib_dev *dev)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static void *mlx4_ib_add(struct mlx4_dev *dev)
+{
+ struct mlx4_ib_dev *ibdev;
+
+ ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
+ if (!ibdev) {
+ dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
+ return NULL;
+ }
+
+ if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
+ goto err_dealloc;
+
+ if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
+ goto err_pd;
+
+ ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!ibdev->uar_map)
+ goto err_uar;
+ MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
+
+ INIT_LIST_HEAD(&ibdev->pgdir_list);
+ mutex_init(&ibdev->pgdir_mutex);
+
+ ibdev->dev = dev;
+
+ strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
+ ibdev->ib_dev.owner = THIS_MODULE;
+ ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
+ ibdev->ib_dev.num_comp_vectors = 1;
+ ibdev->ib_dev.dma_device = &dev->pdev->dev;
+
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ ibdev->ib_dev.query_device = mlx4_ib_query_device;
+ ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
+ ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
+ ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
+ ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
+ ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
+ ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
+ ibdev->ib_dev.mmap = mlx4_ib_mmap;
+ ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
+ ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
+ ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
+ ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
+ ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
+ ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
+ ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
+ ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
+ ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
+ ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
+ ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
+ ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
+ ibdev->ib_dev.post_send = mlx4_ib_post_send;
+ ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
+ ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
+ ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
+ ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
+ ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
+ ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
+ ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
+ ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
+ ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
+ ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
+ ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+
+ if (init_node_data(ibdev))
+ goto err_map;
+
+ spin_lock_init(&ibdev->sm_lock);
+ mutex_init(&ibdev->cap_mask_mutex);
+
+ if (ib_register_device(&ibdev->ib_dev))
+ goto err_map;
+
+ if (mlx4_ib_mad_init(ibdev))
+ goto err_reg;
+
+ return ibdev;
+
+err_reg:
+ ib_unregister_device(&ibdev->ib_dev);
+
+err_map:
+ iounmap(ibdev->uar_map);
+
+err_uar:
+ mlx4_uar_free(dev, &ibdev->priv_uar);
+
+err_pd:
+ mlx4_pd_free(dev, ibdev->priv_pdn);
+
+err_dealloc:
+ ib_dealloc_device(&ibdev->ib_dev);
+
+ return NULL;
+}
+
+static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
+{
+ struct mlx4_ib_dev *ibdev = ibdev_ptr;
+ int p;
+
+ for (p = 1; p <= dev->caps.num_ports; ++p)
+ mlx4_CLOSE_PORT(dev, p);
+
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ iounmap(ibdev->uar_map);
+ mlx4_uar_free(dev, &ibdev->priv_uar);
+ mlx4_pd_free(dev, ibdev->priv_pdn);
+ ib_dealloc_device(&ibdev->ib_dev);
+}
+
+static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
+ enum mlx4_dev_event event, int subtype,
+ int port)
+{
+ struct ib_event ibev;
+
+ switch (event) {
+ case MLX4_EVENT_TYPE_PORT_CHANGE:
+ ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ break;
+
+ case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR:
+ ibev.event = IB_EVENT_DEVICE_FATAL;
+ break;
+
+ default:
+ return;
+ }
+
+ ibev.device = ibdev_ptr;
+ ibev.element.port_num = port;
+
+ ib_dispatch_event(&ibev);
+}
+
+static struct mlx4_interface mlx4_ib_interface = {
+ .add = mlx4_ib_add,
+ .remove = mlx4_ib_remove,
+ .event = mlx4_ib_event
+};
+
+static int __init mlx4_ib_init(void)
+{
+ return mlx4_register_interface(&mlx4_ib_interface);
+}
+
+static void __exit mlx4_ib_cleanup(void)
+{
+ mlx4_unregister_interface(&mlx4_ib_interface);
+}
+
+module_init(mlx4_ib_init);
+module_exit(mlx4_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
new file mode 100644
index 00000000000..93dac71f323
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_IB_H
+#define MLX4_IB_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/doorbell.h>
+
+enum {
+ MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
+};
+
+struct mlx4_ib_db_pgdir;
+struct mlx4_ib_user_db_page;
+
+struct mlx4_ib_db {
+ __be32 *db;
+ union {
+ struct mlx4_ib_db_pgdir *pgdir;
+ struct mlx4_ib_user_db_page *user_page;
+ } u;
+ dma_addr_t dma;
+ int index;
+ int order;
+};
+
+struct mlx4_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mlx4_uar uar;
+ struct list_head db_page_list;
+ struct mutex db_page_mutex;
+};
+
+struct mlx4_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
+struct mlx4_ib_cq_buf {
+ struct mlx4_buf buf;
+ struct mlx4_mtt mtt;
+};
+
+struct mlx4_ib_cq {
+ struct ib_cq ibcq;
+ struct mlx4_cq mcq;
+ struct mlx4_ib_cq_buf buf;
+ struct mlx4_ib_db db;
+ spinlock_t lock;
+ struct ib_umem *umem;
+};
+
+struct mlx4_ib_mr {
+ struct ib_mr ibmr;
+ struct mlx4_mr mmr;
+ struct ib_umem *umem;
+};
+
+struct mlx4_ib_wq {
+ u64 *wrid;
+ spinlock_t lock;
+ int max;
+ int max_gs;
+ int offset;
+ int wqe_shift;
+ unsigned head;
+ unsigned tail;
+};
+
+struct mlx4_ib_qp {
+ struct ib_qp ibqp;
+ struct mlx4_qp mqp;
+ struct mlx4_buf buf;
+
+ struct mlx4_ib_db db;
+ struct mlx4_ib_wq rq;
+
+ u32 doorbell_qpn;
+ __be32 sq_signal_bits;
+ struct mlx4_ib_wq sq;
+
+ struct ib_umem *umem;
+ struct mlx4_mtt mtt;
+ int buf_size;
+ struct mutex mutex;
+ u8 port;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+ u8 state;
+};
+
+struct mlx4_ib_srq {
+ struct ib_srq ibsrq;
+ struct mlx4_srq msrq;
+ struct mlx4_buf buf;
+ struct mlx4_ib_db db;
+ u64 *wrid;
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct ib_umem *umem;
+ struct mlx4_mtt mtt;
+ struct mutex mutex;
+};
+
+struct mlx4_ib_ah {
+ struct ib_ah ibah;
+ struct mlx4_av av;
+};
+
+struct mlx4_ib_dev {
+ struct ib_device ib_dev;
+ struct mlx4_dev *dev;
+ void __iomem *uar_map;
+
+ struct list_head pgdir_list;
+ struct mutex pgdir_mutex;
+
+ struct mlx4_uar priv_uar;
+ u32 priv_pdn;
+ MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
+
+ struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
+ struct ib_ah *sm_ah[MLX4_MAX_PORTS];
+ spinlock_t sm_lock;
+
+ struct mutex cap_mask_mutex;
+};
+
+static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+}
+
+static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
+}
+
+static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct mlx4_ib_pd, ibpd);
+}
+
+static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct mlx4_ib_cq, ibcq);
+}
+
+static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
+{
+ return container_of(mcq, struct mlx4_ib_cq, mcq);
+}
+
+static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct mlx4_ib_mr, ibmr);
+}
+
+static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct mlx4_ib_qp, ibqp);
+}
+
+static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
+{
+ return container_of(mqp, struct mlx4_ib_qp, mqp);
+}
+
+static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
+}
+
+static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
+{
+ return container_of(msrq, struct mlx4_ib_srq, msrq);
+}
+
+static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct mlx4_ib_ah, ibah);
+}
+
+int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
+void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+ struct mlx4_ib_db *db);
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
+
+struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
+int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
+ struct ib_umem *umem);
+struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int mlx4_ib_dereg_mr(struct ib_mr *mr);
+
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_cq(struct ib_cq *cq);
+int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
+void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx4_ib_destroy_ah(struct ib_ah *ah);
+
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx4_ib_destroy_srq(struct ib_srq *srq);
+void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_qp(struct ib_qp *qp);
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad);
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
+void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
+
+static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
+{
+ return !!(ah->av.g_slid & 0x80);
+}
+
+#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
new file mode 100644
index 00000000000..85ae906f1d1
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4_ib.h"
+
+static u32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
+ MLX4_PERM_LOCAL_READ;
+}
+
+struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct mlx4_ib_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
+ ~0ull, convert_access(acc), 0, 0, &mr->mmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
+ struct ib_umem *umem)
+{
+ u64 *pages;
+ struct ib_umem_chunk *chunk;
+ int i, j, k;
+ int n;
+ int len;
+ int err = 0;
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(&chunk->page_list[j]) +
+ umem->page_size * k;
+ /*
+ * Be friendly to WRITE_MTT firmware
+ * command, and pass it chunks of
+ * appropriate size.
+ */
+ if (i == PAGE_SIZE / sizeof (u64) - 2) {
+ err = mlx4_write_mtt(dev->dev, mtt, n,
+ i, pages);
+ if (err)
+ goto out;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
+
+out:
+ free_page((unsigned long) pages);
+ return err;
+}
+
+struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mr *mr;
+ int shift;
+ int err;
+ int n;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err_free;
+ }
+
+ n = ib_umem_page_count(mr->umem);
+ shift = ilog2(mr->umem->page_size);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+ convert_access(access_flags), n, shift, &mr->mmr);
+ if (err)
+ goto err_umem;
+
+ err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+ if (err)
+ goto err_mr;
+
+ err = mlx4_mr_enable(dev->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+ mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
new file mode 100644
index 00000000000..dc137dec230
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -0,0 +1,1386 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pack.h>
+
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+enum {
+ MLX4_IB_ACK_REQ_FREQ = 8,
+};
+
+enum {
+ MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+};
+
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate data.
+ */
+ MLX4_IB_UD_HEADER_SIZE = 72
+};
+
+struct mlx4_ib_sqp {
+ struct mlx4_ib_qp qp;
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+};
+
+static const __be32 mlx4_ib_opcode[] = {
+ [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
+ [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+ [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+ [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+ [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+ [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+};
+
+static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
+{
+ return container_of(mqp, struct mlx4_ib_sqp, qp);
+}
+
+static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
+ qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
+}
+
+static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
+ qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
+}
+
+static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
+{
+ if (qp->buf.nbufs == 1)
+ return qp->buf.u.direct.buf + offset;
+ else
+ return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
+ (offset & (PAGE_SIZE - 1));
+}
+
+static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+}
+
+static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
+}
+
+static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+
+ if (type == MLX4_EVENT_TYPE_PATH_MIG)
+ to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+
+ if (ibqp->event_handler) {
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (type) {
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX4_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on QP %06x\n", type, qp->qpn);
+ return;
+ }
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+static int send_wqe_overhead(enum ib_qp_type type)
+{
+ /*
+ * UD WQEs must have a datagram segment.
+ * RC and UC WQEs might have a remote address segment.
+ * MLX WQEs need two extra inline data segments (for the UD
+ * header and space for the ICRC).
+ */
+ switch (type) {
+ case IB_QPT_UD:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg);
+ case IB_QPT_UC:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_raddr_seg);
+ case IB_QPT_RC:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_atomic_seg) +
+ sizeof (struct mlx4_wqe_raddr_seg);
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ ALIGN(MLX4_IB_UD_HEADER_SIZE +
+ sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg)) +
+ ALIGN(4 +
+ sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg));
+ default:
+ return sizeof (struct mlx4_wqe_ctrl_seg);
+ }
+}
+
+static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
+ struct mlx4_ib_qp *qp)
+{
+ /* Sanity check RQ size before proceeding */
+ if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
+ cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+ return -EINVAL;
+
+ qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
+
+ qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
+ sizeof (struct mlx4_wqe_data_seg)));
+ qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
+
+ cap->max_recv_wr = qp->rq.max;
+ cap->max_recv_sge = qp->rq.max_gs;
+
+ return 0;
+}
+
+static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
+ enum ib_qp_type type, struct mlx4_ib_qp *qp)
+{
+ /* Sanity check SQ size before proceeding */
+ if (cap->max_send_wr > dev->dev->caps.max_wqes ||
+ cap->max_send_sge > dev->dev->caps.max_sq_sg ||
+ cap->max_inline_data + send_wqe_overhead(type) +
+ sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
+ return -EINVAL;
+
+ /*
+ * For MLX transport we need 2 extra S/G entries:
+ * one for the header and one for the checksum at the end
+ */
+ if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
+ cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
+ return -EINVAL;
+
+ qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;
+
+ qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
+ sizeof (struct mlx4_wqe_data_seg),
+ cap->max_inline_data +
+ sizeof (struct mlx4_wqe_inline_seg)) +
+ send_wqe_overhead(type)));
+ qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
+ sizeof (struct mlx4_wqe_data_seg);
+
+ qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
+ (qp->sq.max << qp->sq.wqe_shift);
+ if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
+ qp->rq.offset = 0;
+ qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
+ } else {
+ qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
+ qp->sq.offset = 0;
+ }
+
+ cap->max_send_wr = qp->sq.max;
+ cap->max_send_sge = qp->sq.max_gs;
+ cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
+ sizeof (struct mlx4_wqe_inline_seg);
+
+ return 0;
+}
+
+static int set_user_sq_size(struct mlx4_ib_qp *qp,
+ struct mlx4_ib_create_qp *ucmd)
+{
+ qp->sq.max = 1 << ucmd->log_sq_bb_count;
+ qp->sq.wqe_shift = ucmd->log_sq_stride;
+
+ qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
+ (qp->sq.max << qp->sq.wqe_shift);
+
+ return 0;
+}
+
+static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
+{
+ int err;
+
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ qp->state = IB_QPS_RESET;
+ qp->atomic_rd_en = 0;
+ qp->resp_depth = 0;
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+
+ err = set_rq_size(dev, &init_attr->cap, qp);
+ if (err)
+ goto err;
+
+ if (pd->uobject) {
+ struct mlx4_ib_create_qp ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ err = set_user_sq_size(qp, &ucmd);
+ if (err)
+ goto err;
+
+ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ qp->buf_size, 0);
+ if (IS_ERR(qp->umem)) {
+ err = PTR_ERR(qp->umem);
+ goto err;
+ }
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
+ ilog2(qp->umem->page_size), &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+ if (err)
+ goto err_mtt;
+
+ if (!init_attr->srq) {
+ err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+ ucmd.db_addr, &qp->db);
+ if (err)
+ goto err_mtt;
+ }
+ } else {
+ err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
+ if (err)
+ goto err;
+
+ if (!init_attr->srq) {
+ err = mlx4_ib_db_alloc(dev, &qp->db, 0);
+ if (err)
+ goto err;
+
+ *qp->db.db = 0;
+ }
+
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+
+ err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
+ &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ if (err)
+ goto err_mtt;
+
+ qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
+ qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
+
+ if (!qp->sq.wrid || !qp->rq.wrid) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+
+ /* We don't support inline sends for kernel QPs (yet) */
+ init_attr->cap.max_inline_data = 0;
+ }
+
+ err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
+ if (err)
+ goto err_wrid;
+
+ /*
+ * Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+ else
+ qp->sq_signal_bits = 0;
+
+ qp->mqp.event = mlx4_ib_qp_event;
+
+ return 0;
+
+err_wrid:
+ if (pd->uobject && !init_attr->srq)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
+ else {
+ kfree(qp->sq.wrid);
+ kfree(qp->rq.wrid);
+ }
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+
+err_buf:
+ if (pd->uobject)
+ ib_umem_release(qp->umem);
+ else
+ mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+
+err_db:
+ if (!pd->uobject && !init_attr->srq)
+ mlx4_ib_db_free(dev, &qp->db);
+
+err:
+ return err;
+}
+
+static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET: return MLX4_QP_STATE_RST;
+ case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
+ case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
+ case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
+ case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
+ case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
+ case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
+ default: return -1;
+ }
+}
+
+static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_lock_irq(&send_cq->lock);
+ else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_unlock_irq(&send_cq->lock);
+ else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ int is_user)
+{
+ struct mlx4_ib_cq *send_cq, *recv_cq;
+
+ if (qp->state != IB_QPS_RESET)
+ if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
+ MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
+ printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
+ qp->mqp.qpn);
+
+ send_cq = to_mcq(qp->ibqp.send_cq);
+ recv_cq = to_mcq(qp->ibqp.recv_cq);
+
+ mlx4_ib_lock_cqs(send_cq, recv_cq);
+
+ if (!is_user) {
+ __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
+ if (send_cq != recv_cq)
+ __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ }
+
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+
+ mlx4_ib_unlock_cqs(send_cq, recv_cq);
+
+ mlx4_qp_free(dev->dev, &qp->mqp);
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+
+ if (is_user) {
+ if (!qp->ibqp.srq)
+ mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
+ &qp->db);
+ ib_umem_release(qp->umem);
+ } else {
+ kfree(qp->sq.wrid);
+ kfree(qp->rq.wrid);
+ mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+ if (!qp->ibqp.srq)
+ mlx4_ib_db_free(dev, &qp->db);
+ }
+}
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_qp *qp;
+ int err;
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ {
+ qp = kmalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ break;
+ }
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ {
+ /* Userspace is not allowed to create special QPs: */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
+ sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
+ if (!sqp)
+ return ERR_PTR(-ENOMEM);
+
+ qp = &sqp->qp;
+
+ err = create_qp_common(dev, pd, init_attr, udata,
+ dev->dev->caps.sqp_start +
+ (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+ init_attr->port_num - 1,
+ qp);
+ if (err) {
+ kfree(sqp);
+ return ERR_PTR(err);
+ }
+
+ qp->port = init_attr->port_num;
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+
+ break;
+ }
+ default:
+ /* Don't support raw QPs */
+ return ERR_PTR(-EINVAL);
+ }
+
+ return &qp->ibqp;
+}
+
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (is_qp0(dev, mqp))
+ mlx4_CLOSE_PORT(dev->dev, mqp->port);
+
+ destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+
+ if (is_sqp(dev, mqp))
+ kfree(to_msqp(mqp));
+ else
+ kfree(mqp);
+
+ return 0;
+}
+
+static void init_port(struct mlx4_ib_dev *dev, int port)
+{
+ struct mlx4_init_port_param param;
+ int err;
+
+ memset(&param, 0, sizeof param);
+
+ param.port_width_cap = dev->dev->caps.port_width_cap;
+ param.vl_cap = dev->dev->caps.vl_cap;
+ param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
+ param.max_gid = dev->dev->caps.gid_table_len;
+ param.max_pkey = dev->dev->caps.pkey_table_len;
+
+ err = mlx4_INIT_PORT(dev->dev, &param, port);
+ if (err)
+ printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
+}
+
+static int to_mlx4_st(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_RC: return MLX4_QP_ST_RC;
+ case IB_QPT_UC: return MLX4_QP_ST_UC;
+ case IB_QPT_UD: return MLX4_QP_ST_UD;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI: return MLX4_QP_ST_MLX;
+ default: return -1;
+ }
+}
+
+static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+ u32 hw_access_flags = 0;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ dest_rd_atomic = attr->max_dest_rd_atomic;
+ else
+ dest_rd_atomic = qp->resp_depth;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ access_flags = attr->qp_access_flags;
+ else
+ access_flags = qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ hw_access_flags |= MLX4_QP_BIT_RRE;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ hw_access_flags |= MLX4_QP_BIT_RAE;
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ hw_access_flags |= MLX4_QP_BIT_RWE;
+
+ return cpu_to_be32(hw_access_flags);
+}
+
+static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ sqp->pkey_index = attr->pkey_index;
+ if (attr_mask & IB_QP_QKEY)
+ sqp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_SQ_PSN)
+ sqp->send_psn = attr->sq_psn;
+}
+
+static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
+{
+ path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
+}
+
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+ struct mlx4_qp_path *path, u8 port)
+{
+ path->grh_mylmc = ah->src_path_bits & 0x7f;
+ path->rlid = cpu_to_be16(ah->dlid);
+ if (ah->static_rate) {
+ path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
+ --path->static_rate;
+ } else
+ path->static_rate = 0;
+ path->counter_index = 0xff;
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
+ printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
+ return -1;
+ }
+
+ path->grh_mylmc |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->tclass_flowlabel =
+ cpu_to_be32((ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ }
+
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+
+ return 0;
+}
+
+static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_qp_context *context;
+ enum mlx4_qp_optpar optpar = 0;
+ int sqd_event;
+ int err = -EINVAL;
+
+ context = kzalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
+ (to_mlx4_st(ibqp->qp_type) << 16));
+ context->flags |= cpu_to_be32(1 << 8); /* DE? */
+
+ if (!(attr_mask & IB_QP_PATH_MIG_STATE))
+ context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
+ else {
+ optpar |= MLX4_QP_OPTPAR_PM_STATE;
+ switch (attr->path_mig_state) {
+ case IB_MIG_MIGRATED:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
+ break;
+ case IB_MIG_REARM:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
+ break;
+ case IB_MIG_ARMED:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
+ break;
+ }
+ }
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
+ ibqp->qp_type == IB_QPT_UD)
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
+ printk(KERN_ERR "path MTU (%u) is invalid\n",
+ attr->path_mtu);
+ return -EINVAL;
+ }
+ context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+ }
+
+ if (qp->rq.max)
+ context->rq_size_stride = ilog2(qp->rq.max) << 3;
+ context->rq_size_stride |= qp->rq.wqe_shift - 4;
+
+ if (qp->sq.max)
+ context->sq_size_stride = ilog2(qp->sq.max) << 3;
+ context->sq_size_stride |= qp->sq.wqe_shift - 4;
+
+ if (qp->ibqp.uobject)
+ context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+ else
+ context->usr_page = cpu_to_be32(dev->priv_uar.index);
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
+
+ if (attr_mask & IB_QP_PORT) {
+ if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
+ !(attr_mask & IB_QP_AV)) {
+ mlx4_set_sched(&context->pri_path, attr->port_num);
+ optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ context->pri_path.pkey_index = attr->pkey_index;
+ optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+ optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
+ }
+
+ if (attr_mask & IB_QP_AV) {
+ if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
+ MLX4_QP_OPTPAR_SCHED_QUEUE);
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ context->pri_path.ackto = attr->timeout << 3;
+ optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
+ return -EINVAL;
+
+ if (attr->alt_port_num == 0 ||
+ attr->alt_port_num > dev->dev->caps.num_ports)
+ return -EINVAL;
+
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ attr->alt_port_num))
+ return -EINVAL;
+
+ context->alt_path.pkey_index = attr->alt_pkey_index;
+ context->alt_path.ackto = attr->alt_timeout << 3;
+ optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
+ }
+
+ context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic)
+ context->params1 |=
+ cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+ optpar |= MLX4_QP_OPTPAR_SRA_MAX;
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN)
+ context->next_send_psn = cpu_to_be32(attr->sq_psn);
+
+ context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic)
+ context->params2 |=
+ cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+ optpar |= MLX4_QP_OPTPAR_RRA_MAX;
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
+ optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
+ }
+
+ if (ibqp->srq)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
+ }
+ if (attr_mask & IB_QP_RQ_PSN)
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+ context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
+
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey = cpu_to_be32(attr->qkey);
+ optpar |= MLX4_QP_OPTPAR_Q_KEY;
+ }
+
+ if (ibqp->srq)
+ context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
+
+ if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->db_rec_addr = cpu_to_be64(qp->db.dma);
+
+ if (cur_state == IB_QPS_INIT &&
+ new_state == IB_QPS_RTR &&
+ (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
+ ibqp->qp_type == IB_QPT_UD)) {
+ context->pri_path.sched_queue = (qp->port - 1) << 6;
+ if (is_qp0(dev, qp))
+ context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
+ else
+ context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ }
+
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
+ sqd_event = 1;
+ else
+ sqd_event = 0;
+
+ /*
+ * Before passing a kernel QP to the HW, make sure that the
+ * ownership bits of the send queue are set so that the
+ * hardware doesn't start processing stale work requests.
+ */
+ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ int i;
+
+ for (i = 0; i < qp->sq.max; ++i) {
+ ctrl = get_send_wqe(qp, i);
+ ctrl->owner_opcode = cpu_to_be32(1 << 31);
+ }
+ }
+
+ err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
+ to_mlx4_state(new_state), context, optpar,
+ sqd_event, &qp->mqp);
+ if (err)
+ goto out;
+
+ qp->state = new_state;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_port = attr->alt_port_num;
+
+ if (is_sqp(dev, qp))
+ store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+
+ /*
+ * If we moved QP0 to RTR, bring the IB link up; if we moved
+ * QP0 to RESET or ERROR, bring the link back down.
+ */
+ if (is_qp0(dev, qp)) {
+ if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
+ init_port(dev, qp->port);
+
+ if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ mlx4_CLOSE_PORT(dev->dev, qp->port);
+ }
+
+ /*
+ * If we moved a kernel QP to RESET, clean up all old CQ
+ * entries and reinitialize the QP.
+ */
+ if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq): NULL);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ if (!ibqp->srq)
+ *qp->db.db = 0;
+ }
+
+out:
+ kfree(context);
+ return err;
+}
+
+static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
+static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+};
+
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->dev->caps.pkey_table_len) {
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
+ err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
+ mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
+ IB_QPS_RESET, IB_QPS_INIT);
+ if (err)
+ goto out;
+ cur_state = IB_QPS_INIT;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+ void *wqe)
+{
+ struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ u16 pkey;
+ int send_size;
+ int header_size;
+ int i;
+
+ send_size = 0;
+ for (i = 0; i < wr->num_sge; ++i)
+ send_size += wr->sg_list[i].length;
+
+ ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
+
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid = ah->av.dlid;
+ sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
+ if (mlx4_ib_ah_grh_present(ah)) {
+ sqp->ud_header.grh.traffic_class =
+ (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.grh.flow_label =
+ ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
+ ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
+ ah->av.gid_index, &sqp->ud_header.grh.source_gid);
+ memcpy(sqp->ud_header.grh.destination_gid.raw,
+ ah->av.dgid, 16);
+ }
+
+ mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.service_level << 8));
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.immediate_present = 1;
+ sqp->ud_header.immediate_data = wr->imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+ sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ if (!sqp->qp.ibqp.qp_num)
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+ else
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+
+ header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+
+ if (0) {
+ printk(KERN_ERR "built UD header of size %d:\n", header_size);
+ for (i = 0; i < header_size / 4; ++i) {
+ if (i % 8 == 0)
+ printk(" [%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ printk("\n");
+ }
+
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ memcpy(inl + 1, sqp->header_buf, header_size);
+
+ return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+}
+
+static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+ unsigned cur;
+ struct mlx4_ib_cq *cq;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max;
+}
+
+int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ unsigned long flags;
+ int nreq;
+ int err = 0;
+ int ind;
+ int size;
+ int i;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->sq.head;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
+ qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
+
+ ctrl->srcrb_flags =
+ (wr->send_flags & IB_SEND_SIGNALED ?
+ cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+ qp->sq_signal_bits;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ ctrl->imm = wr->imm_data;
+ else
+ ctrl->imm = 0;
+
+ wqe += sizeof *ctrl;
+ size = sizeof *ctrl / 16;
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.atomic.remote_addr);
+ ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.atomic.rkey);
+ ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
+
+ wqe += sizeof (struct mlx4_wqe_raddr_seg);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.swap);
+ ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
+ cpu_to_be64(wr->wr.atomic.compare_add);
+ ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
+ }
+
+ wqe += sizeof (struct mlx4_wqe_atomic_seg);
+ size += (sizeof (struct mlx4_wqe_raddr_seg) +
+ sizeof (struct mlx4_wqe_atomic_seg)) / 16;
+
+ break;
+
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
+ cpu_to_be64(wr->wr.rdma.remote_addr);
+ ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
+ cpu_to_be32(wr->wr.rdma.rkey);
+ ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
+
+ wqe += sizeof (struct mlx4_wqe_raddr_seg);
+ size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
+
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+ break;
+
+ case IB_QPT_UD:
+ memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
+ &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
+ ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
+ cpu_to_be32(wr->wr.ud.remote_qpn);
+ ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
+ cpu_to_be32(wr->wr.ud.remote_qkey);
+
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ break;
+
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ err = build_mlx_header(to_msqp(qp), wr, ctrl);
+ if (err < 0) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += err;
+ size += err / 16;
+
+ err = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
+ cpu_to_be32(wr->sg_list[i].length);
+ ((struct mlx4_wqe_data_seg *) wqe)->lkey =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ ((struct mlx4_wqe_data_seg *) wqe)->addr =
+ cpu_to_be64(wr->sg_list[i].addr);
+
+ wqe += sizeof (struct mlx4_wqe_data_seg);
+ size += sizeof (struct mlx4_wqe_data_seg) / 16;
+ }
+
+ /* Add one more inline data segment for ICRC for MLX sends */
+ if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
+ ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
+ ((u32 *) wqe)[1] = 0;
+ wqe += sizeof (struct mlx4_wqe_data_seg);
+ size += sizeof (struct mlx4_wqe_data_seg) / 16;
+ }
+
+ ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
+ MLX4_WQE_CTRL_FENCE : 0) | size;
+
+ /*
+ * Make sure descriptor is fully written before
+ * setting ownership bit (because HW can start
+ * executing as soon as we do).
+ */
+ wmb();
+
+ if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
+ (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
+
+ ++ind;
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ writel(qp->doorbell_qpn,
+ to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
+
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ mmiowb();
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
+
+int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_wqe_data_seg *scat;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->rq.head & (qp->rq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = get_recv_wqe(qp, ind);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
+ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
+ }
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.max - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
new file mode 100644
index 00000000000..12fac1c8989
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/srq.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+static void *get_wqe(struct mlx4_ib_srq *srq, int n)
+{
+ int offset = n << srq->msrq.wqe_shift;
+
+ if (srq->buf.nbufs == 1)
+ return srq->buf.u.direct.buf + offset;
+ else
+ return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
+ (offset & (PAGE_SIZE - 1));
+}
+
+static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (type) {
+ case MLX4_EVENT_TYPE_SRQ_LIMIT:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on SRQ %06x\n", type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_srq *srq;
+ struct mlx4_wqe_srq_next_seg *next;
+ int desc_size;
+ int buf_size;
+ int err;
+ int i;
+
+ /* Sanity check SRQ size before proceeding */
+ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
+ init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
+ return ERR_PTR(-EINVAL);
+
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+ srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+ srq->msrq.max_gs = init_attr->attr.max_sge;
+
+ desc_size = max(32UL,
+ roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
+ srq->msrq.max_gs *
+ sizeof (struct mlx4_wqe_data_seg)));
+ srq->msrq.wqe_shift = ilog2(desc_size);
+
+ buf_size = srq->msrq.max * desc_size;
+
+ if (pd->uobject) {
+ struct mlx4_ib_create_srq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_srq;
+ }
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ buf_size, 0);
+ if (IS_ERR(srq->umem)) {
+ err = PTR_ERR(srq->umem);
+ goto err_srq;
+ }
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
+ ilog2(srq->umem->page_size), &srq->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+ ucmd.db_addr, &srq->db);
+ if (err)
+ goto err_mtt;
+ } else {
+ err = mlx4_ib_db_alloc(dev, &srq->db, 0);
+ if (err)
+ goto err_srq;
+
+ *srq->db.db = 0;
+
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+
+ srq->head = 0;
+ srq->tail = srq->msrq.max - 1;
+ srq->wqe_ctr = 0;
+
+ for (i = 0; i < srq->msrq.max; ++i) {
+ next = get_wqe(srq, i);
+ next->next_wqe_index =
+ cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+ }
+
+ err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
+ &srq->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ if (err)
+ goto err_mtt;
+
+ srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+ }
+
+ err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
+ srq->db.dma, &srq->msrq);
+ if (err)
+ goto err_wrid;
+
+ srq->msrq.event = mlx4_ib_srq_event;
+
+ if (pd->uobject)
+ if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
+ err = -EFAULT;
+ goto err_wrid;
+ }
+
+ init_attr->attr.max_wr = srq->msrq.max - 1;
+
+ return &srq->ibsrq;
+
+err_wrid:
+ if (pd->uobject)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ else
+ kfree(srq->wrid);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &srq->mtt);
+
+err_buf:
+ if (pd->uobject)
+ ib_umem_release(srq->umem);
+ else
+ mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+
+err_db:
+ if (!pd->uobject)
+ mlx4_ib_db_free(dev, &srq->db);
+
+err_srq:
+ kfree(srq);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit >= srq->msrq.max)
+ return -EINVAL;
+
+ mutex_lock(&srq->mutex);
+ ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
+ mutex_unlock(&srq->mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int mlx4_ib_destroy_srq(struct ib_srq *srq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(srq->device);
+ struct mlx4_ib_srq *msrq = to_msrq(srq);
+
+ mlx4_srq_free(dev->dev, &msrq->msrq);
+ mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
+
+ if (srq->uobject) {
+ mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+ ib_umem_release(msrq->umem);
+ } else {
+ kfree(msrq->wrid);
+ mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
+ &msrq->buf);
+ mlx4_ib_db_free(dev, &msrq->db);
+ }
+
+ kfree(msrq);
+
+ return 0;
+}
+
+void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
+{
+ struct mlx4_wqe_srq_next_seg *next;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ next = get_wqe(srq, srq->tail);
+ next->next_wqe_index = cpu_to_be16(wqe_index);
+ srq->tail = wqe_index;
+
+ spin_unlock(&srq->lock);
+}
+
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ struct mlx4_wqe_srq_next_seg *next;
+ struct mlx4_wqe_data_seg *scat;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ srq->wrid[srq->head] = wr->wr_id;
+
+ next = get_wqe(srq, srq->head);
+ srq->head = be16_to_cpu(next->next_wqe_index);
+ scat = (struct mlx4_wqe_data_seg *) (next + 1);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
+ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->msrq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+ }
+
+ if (likely(nreq)) {
+ srq->wqe_ctr += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *srq->db.db = cpu_to_be32(srq->wqe_ctr);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
new file mode 100644
index 00000000000..88c72d56368
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_IB_USER_H
+#define MLX4_IB_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX4_IB_UVERBS_ABI_VERSION 2
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx4_ib_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
+struct mlx4_ib_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_create_cq {
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_ib_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_resize_cq {
+ __u64 buf_addr;
+};
+
+struct mlx4_ib_create_srq {
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_ib_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_create_qp {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u8 log_sq_bb_count;
+ __u8 log_sq_stride;
+ __u8 reserved[6];
+};
+
+#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 27caf3b0648..4b111a852ff 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -279,6 +279,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
(be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ header->grh.hop_limit = ah->av->hop_limit;
ib_get_cached_gid(&dev->ib_dev,
be32_to_cpu(ah->av->port_pd) >> 24,
ah->av->gid_index % dev->limits.gid_table_len,
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 71314460b11..38102520ffb 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -37,6 +37,7 @@
#include <linux/completion.h>
#include <linux/pci.h>
#include <linux/errno.h>
+#include <linux/sched.h>
#include <asm/io.h>
#include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index efd79ef109a..be6e1e03bda 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -37,6 +37,7 @@
*/
#include <linux/hardirq.h>
+#include <linux/sched.h>
#include <asm/io.h>
@@ -284,7 +285,7 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
{
struct mthca_cqe *cqe;
u32 prod_index;
- int nfreed = 0;
+ int i, nfreed = 0;
spin_lock_irq(&cq->lock);
@@ -321,6 +322,8 @@ void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
}
if (nfreed) {
+ for (i = 0; i < nfreed; ++i)
+ set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
wmb();
cq->cons_index += nfreed;
update_cons_index(dev, cq, nfreed);
@@ -726,11 +729,12 @@ repoll:
return err == 0 || err == -EAGAIN ? npolled : err;
}
-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
{
__be32 doorbell[2];
- doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ?
+ doorbell[0] = cpu_to_be32(((flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ?
MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
MTHCA_TAVOR_CQ_DB_REQ_NOT) |
to_mcq(cq)->cqn);
@@ -743,7 +747,7 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify)
return 0;
}
-int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
+int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mthca_cq *cq = to_mcq(ibcq);
__be32 doorbell[2];
@@ -755,7 +759,8 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
doorbell[0] = ci;
doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
- (notify == IB_CQ_SOLICITED ? 1 : 2));
+ ((flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ? 1 : 2));
mthca_write_db_rec(doorbell, cq->arm_db);
@@ -766,7 +771,7 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
wmb();
doorbell[0] = cpu_to_be32((sn << 28) |
- (notify == IB_CQ_SOLICITED ?
+ ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
MTHCA_ARBEL_CQ_DB_REQ_NOT) |
cq->cqn);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index b7e42efaf43..9bae3cc6060 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -495,8 +495,8 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev);
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *entry);
-int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
-int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 773145e2994..aa563e61de6 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1250,12 +1250,14 @@ static void __mthca_remove_one(struct pci_dev *pdev)
int __mthca_restart_one(struct pci_dev *pdev)
{
struct mthca_dev *mdev;
+ int hca_type;
mdev = pci_get_drvdata(pdev);
if (!mdev)
return -ENODEV;
+ hca_type = mdev->hca_type;
__mthca_remove_one(pdev);
- return __mthca_init_one(pdev, mdev->hca_type);
+ return __mthca_init_one(pdev, hca_type);
}
static int __devinit mthca_init_one(struct pci_dev *pdev,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 48f7c65e9ae..e61f3e62698 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -36,6 +36,7 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
+#include <linux/sched.h>
#include <asm/page.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 594144145f4..a1ab06847b7 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -38,7 +38,6 @@
#define MTHCA_MEMFREE_H
#include <linux/list.h>
-#include <linux/pci.h>
#include <linux/mutex.h>
#define MTHCA_ICM_CHUNK_LEN \
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 47e6fd46d9c..6bcde1cb968 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <linux/mm.h>
@@ -663,6 +664,7 @@ static int mthca_destroy_qp(struct ib_qp *qp)
}
static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@@ -907,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
+ mr->umem = NULL;
+
return &mr->ibmr;
}
@@ -1002,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
}
kfree(page_list);
+ mr->umem = NULL;
+
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
- int acc, struct ib_udata *udata)
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
@@ -1017,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
int err = 0;
int write_mtt_size;
- shift = ffs(region->page_size) - 1;
-
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err;
+ }
+
+ shift = ffs(mr->umem->page_size) - 1;
+
n = 0;
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
- goto err;
+ goto err_umem;
}
pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1043,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, &region->chunk_list, list)
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- region->page_size * k;
+ mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
@@ -1070,8 +1082,8 @@ mtt_done:
if (err)
goto err_mtt;
- err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
- region->length, convert_access(acc), mr);
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+ convert_access(acc), mr);
if (err)
goto err_mtt;
@@ -1081,6 +1093,9 @@ mtt_done:
err_mtt:
mthca_free_mtt(dev, mr->mtt);
+err_umem:
+ ib_umem_release(mr->umem);
+
err:
kfree(mr);
return ERR_PTR(err);
@@ -1089,8 +1104,12 @@ err:
static int mthca_dereg_mr(struct ib_mr *mr)
{
struct mthca_mr *mmr = to_mmr(mr);
+
mthca_free_mr(to_mdev(mr->device), mmr);
+ if (mmr->umem)
+ ib_umem_release(mmr->umem);
kfree(mmr);
+
return 0;
}
@@ -1292,6 +1311,7 @@ int mthca_register_device(struct mthca_dev *dev)
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
+ dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dma_device = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac2e09..262616c8ebb 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
struct mthca_mr {
struct ib_mr ibmr;
+ struct ib_umem *umem;
struct mthca_mtt *mtt;
};
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 8fe6fee7a97..eef415b12b2 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -37,6 +37,7 @@
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <asm/io.h>
@@ -295,7 +296,7 @@ static int to_mthca_st(int transport)
}
}
-static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
+static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -327,7 +328,7 @@ static void init_port(struct mthca_dev *dev, int port)
mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
}
-static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
+static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
int attr_mask)
{
u8 dest_rd_atomic;
@@ -510,7 +511,7 @@ out:
return err;
}
-static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
+static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
struct mthca_qp_path *path, u8 port)
{
path->g_mylmc = ah->src_path_bits & 0x7f;
@@ -538,12 +539,12 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
return 0;
}
-int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
- struct ib_udata *udata)
+static int __mthca_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
- enum ib_qp_state cur_state, new_state;
struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
@@ -551,60 +552,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
u8 status;
int err = -EINVAL;
- mutex_lock(&qp->mutex);
-
- if (attr_mask & IB_QP_CUR_STATE) {
- cur_state = attr->cur_qp_state;
- } else {
- spin_lock_irq(&qp->sq.lock);
- spin_lock(&qp->rq.lock);
- cur_state = qp->state;
- spin_unlock(&qp->rq.lock);
- spin_unlock_irq(&qp->sq.lock);
- }
-
- new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
- mthca_dbg(dev, "Bad QP transition (transport %d) "
- "%d->%d with attr 0x%08x\n",
- qp->transport, cur_state, new_state,
- attr_mask);
- goto out;
- }
-
- if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- err = 0;
- goto out;
- }
-
- if ((attr_mask & IB_QP_PKEY_INDEX) &&
- attr->pkey_index >= dev->limits.pkey_table_len) {
- mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
- attr->pkey_index, dev->limits.pkey_table_len-1);
- goto out;
- }
-
- if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
- mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
- mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
- attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
- mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
- attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
- goto out;
- }
-
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
@@ -701,6 +648,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
}
+ if (ibqp->qp_type == IB_QPT_RC &&
+ cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
+
+ if (mthca_is_memfree(dev))
+ qp_context->rlkey_arbel_sched_queue |= sched_queue;
+ else
+ qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
+
+ qp_param->opt_param_mask |=
+ cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
+ }
+
if (attr_mask & IB_QP_TIMEOUT) {
qp_context->pri_path.ackto = attr->timeout << 3;
qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
@@ -878,6 +838,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
out_mailbox:
mthca_free_mailbox(dev, mailbox);
+out:
+ return err;
+}
+
+static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
+static const int dummy_init_attr_mask[] = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+};
+
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+ if (attr_mask & IB_QP_CUR_STATE) {
+ cur_state = attr->cur_qp_state;
+ } else {
+ spin_lock_irq(&qp->sq.lock);
+ spin_lock(&qp->rq.lock);
+ cur_state = qp->state;
+ spin_unlock(&qp->rq.lock);
+ spin_unlock_irq(&qp->sq.lock);
+ }
+
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ mthca_dbg(dev, "Bad QP transition (transport %d) "
+ "%d->%d with attr 0x%08x\n",
+ qp->transport, cur_state, new_state,
+ attr_mask);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+ attr->pkey_index, dev->limits.pkey_table_len-1);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+ mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+ mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+ attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+ mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+ attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
+ err = __mthca_modify_qp(ibqp, &dummy_init_attr,
+ dummy_init_attr_mask[ibqp->qp_type],
+ IB_QPS_RESET, IB_QPS_INIT);
+ if (err)
+ goto out;
+ cur_state = IB_QPS_INIT;
+ }
+
+ err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
out:
mutex_unlock(&qp->mutex);
@@ -1849,6 +1901,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
dev->kar + MTHCA_RECEIVE_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ qp->rq.next_ind = ind;
qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
size0 = 0;
}
@@ -2231,10 +2284,10 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
struct mthca_next_seg *next;
/*
- * For SRQs, all WQEs generate a CQE, so we're always at the
- * end of the doorbell chain.
+ * For SRQs, all receive WQEs generate a CQE, so we're always
+ * at the end of the doorbell chain.
*/
- if (qp->ibqp.srq) {
+ if (qp->ibqp.srq && !is_send) {
*new_wqe = 0;
return;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 61974b0296c..b8f05a52667 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/sched.h>
#include <asm/io.h>