aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/Makefile12
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c3
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c23
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c19
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig6
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c645
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c64
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c311
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h64
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c65
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c217
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h79
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h15
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c257
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c68
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c39
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c42
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c80
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c52
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c162
-rw-r--r--drivers/infiniband/hw/mlx4/main.c947
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c5
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h46
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c39
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c546
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c7
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c118
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c410
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c72
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c80
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h35
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c461
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c806
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c26
-rw-r--r--drivers/infiniband/hw/mlx5/user.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.c30
-rw-r--r--drivers/infiniband/hw/nes/nes.h3
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c444
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h15
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c263
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h1
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h171
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_abi.h7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c328
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c230
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h263
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c616
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h54
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c250
-rw-r--r--drivers/infiniband/hw/qib/qib.h12
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c52
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c53
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c104
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c181
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c142
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h26
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig10
-rw-r--r--drivers/infiniband/hw/usnic/Makefile15
-rw-r--r--drivers/infiniband/hw/usnic/usnic.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h27
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h68
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c154
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c350
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h113
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h118
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c682
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c761
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h117
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c341
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c768
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h72
-rw-r--r--drivers/infiniband/hw/usnic/usnic_log.h58
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c202
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.h51
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c604
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h80
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c254
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c467
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.h103
126 files changed, 12688 insertions, 2442 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644
index 00000000000..e900b03531a
--- /dev/null
+++ b/drivers/infiniband/hw/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
+obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
+obj-$(CONFIG_INFINIBAND_QIB) += qib/
+obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
+obj-$(CONFIG_INFINIBAND_NES) += nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index d53cf519f42..00400c352c1 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -1082,6 +1082,7 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
/* Initialize network device */
if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+ ret = -ENOMEM;
iounmap(mmio_regs);
goto bail4;
}
@@ -1151,7 +1152,8 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
goto bail10;
}
- if (c2_register_device(c2dev))
+ ret = c2_register_device(c2dev);
+ if (ret)
goto bail10;
return 0;
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index d5d1929753e..cedda25232b 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state)
return "C2_QP_STATE_ERROR";
default:
return "<invalid QP state>";
- };
+ }
}
void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 8951db4ae29..3a17d9b36db 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
* We should never get here, as the adapter should
* never send us a reply that we're not expecting.
*/
- vq_repbuf_free(c2dev, host_msg);
+ if (reply_msg != NULL)
+ vq_repbuf_free(c2dev, host_msg);
pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
return;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 07eb3a8067d..8af33cf1fc4 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 *pages;
u64 kva = 0;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;
@@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(c2mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = c2mr->umem->nmap;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
@@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] =
- sg_dma_address(&chunk->page_list[j]) +
- (c2mr->umem->page_size * k);
- }
+ for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] =
+ sg_dma_address(sg) +
+ (c2mr->umem->page_size * k);
}
}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index b7c98699005..d2a6d961344 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -576,7 +576,8 @@ int c2_rnic_init(struct c2_dev *c2dev)
goto bail4;
/* Initialize cached the adapter limits */
- if (c2_rnic_query(c2dev, &c2dev->props))
+ err = c2_rnic_query(c2dev, &c2dev->props);
+ if (err)
goto bail5;
/* Initialize the PD pool */
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index c3f5aca4ef0..de1c61b417d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -735,14 +735,12 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 095bb046e2c..cb78b1e9bcd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d2283837d45..811b24a539c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
-
+ struct scatterlist *sg;
PDBG("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
@@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
+ n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
@@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
@@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = 0;
}
}
- }
+ }
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index d4e8983fba5..23f38cf2c5c 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,10 +1,10 @@
config INFINIBAND_CXGB4
- tristate "Chelsio T4 RDMA Driver"
+ tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
select GENERIC_ALLOCATOR
---help---
- This is an iWARP/RDMA driver for the Chelsio T4 1GbE and
- 10GbE adapters.
+ This is an iWARP/RDMA driver for the Chelsio T4 and T5
+ 1GbE, 10GbE adapters and T5 40GbE adapter.
For general information about Chelsio and our products, visit
our website at <http://www.chelsio.com>.
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 12fef76c791..768a0fb67dd 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+
#include "iw_cxgb4.h"
static char *states[] = {
@@ -98,9 +100,9 @@ int c4iw_debug;
module_param(c4iw_debug, int, 0644);
MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
-static int peer2peer;
+static int peer2peer = 1;
module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
module_param(p2p_type, int, 0644);
@@ -173,12 +175,15 @@ static void start_ep_timer(struct c4iw_ep *ep)
add_timer(&ep->timer);
}
-static void stop_ep_timer(struct c4iw_ep *ep)
+static int stop_ep_timer(struct c4iw_ep *ep)
{
PDBG("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
- if (!test_and_set_bit(TIMEOUT, &ep->com.flags))
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
+ return 0;
+ }
+ return 1;
}
static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
@@ -229,12 +234,16 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
static void set_emss(struct c4iw_ep *ep, u16 opt)
{
- ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
+ ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
+ sizeof(struct iphdr) - sizeof(struct tcphdr);
ep->mss = ep->emss;
if (GET_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
+ if (ep->emss & 7)
+ PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
ep->mss, ep->emss);
}
@@ -291,6 +300,12 @@ void _c4iw_free_ep(struct kref *kref)
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
}
+ if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
+ print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
+ iwpm_remove_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr);
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ }
kfree(ep);
}
@@ -338,10 +353,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
static struct net_device *get_real_dev(struct net_device *egress_dev)
{
- struct net_device *phys_dev = egress_dev;
- if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
- phys_dev = vlan_dev_real_dev(egress_dev);
- return phys_dev;
+ return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
@@ -400,7 +412,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
n = dst_neigh_lookup(&rt->dst, &peer_ip);
if (!n)
return NULL;
- if (!our_interface(dev, n->dev)) {
+ if (!our_interface(dev, n->dev) &&
+ !(n->dev->flags & IFF_LOOPBACK)) {
dst_release(&rt->dst);
return NULL;
}
@@ -419,8 +432,17 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb)
*/
static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
+ struct c4iw_ep *ep = handle;
+
printk(KERN_ERR MOD "ARP failure duing connect\n");
kfree_skb(skb);
+ connect_reply_upcall(ep, -EHOSTUNREACH);
+ state_set(&ep->com, DEAD);
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_put_ep(&ep->com);
}
/*
@@ -464,7 +486,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
- flowc->mnemval[6].val = cpu_to_be32(snd_win);
+ flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
flowc->mnemval[7].val = cpu_to_be32(ep->emss);
/* Pad WR to 16 byte boundary */
@@ -524,48 +546,47 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
-#define VLAN_NONE 0xfff
-#define FILTER_SEL_VLAN_NONE 0xffff
-#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */
-#define FILTER_SEL_WIDTH_VIN_P_FC \
- (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/
-#define FILTER_SEL_WIDTH_TAG_P_FC \
- (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */
-#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC)
+/*
+ * c4iw_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void c4iw_form_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
+}
-static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,
- struct l2t_entry *l2t)
+/*
+ * c4iw_form_reg_msg - Form a port mapper message with dev info
+ */
+static void c4iw_form_reg_msg(struct c4iw_dev *dev,
+ struct iwpm_dev_data *pm_msg)
{
- unsigned int ntuple = 0;
- u32 viid;
+ memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
+ IWPM_IFNAME_SIZE);
+}
- switch (dev->rdev.lldi.filt_mode) {
+static void c4iw_record_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
+ sizeof(ep->com.mapped_remote_addr));
+}
- /* default filter mode */
- case HW_TPL_FR_MT_PR_IV_P_FC:
- if (l2t->vlan == VLAN_NONE)
- ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
- else {
- ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC;
- ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC;
- }
- ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
- FILTER_SEL_WIDTH_VLD_TAG_P_FC;
- break;
- case HW_TPL_FR_MT_PR_OV_P_FC: {
- viid = cxgb4_port_viid(l2t->neigh->dev);
-
- ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC;
- ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC;
- ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC;
- ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
- FILTER_SEL_WIDTH_VLD_TAG_P_FC;
- break;
- }
- default:
- break;
- }
- return ntuple;
+static void best_mtu(const unsigned short *mtus, unsigned short mtu,
+ unsigned int *idx, int use_ts)
+{
+ unsigned short hdr_size = sizeof(struct iphdr) +
+ sizeof(struct tcphdr) +
+ (use_ts ? 12 : 0);
+ unsigned short data_size = mtu - hdr_size;
+
+ cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
}
static int send_connect(struct c4iw_ep *ep)
@@ -586,10 +607,15 @@ static int send_connect(struct c4iw_ep *ep)
int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
sizeof(struct cpl_act_open_req6) :
sizeof(struct cpl_t5_act_open_req6);
- struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
- struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
- struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
- struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ struct sockaddr_in *la = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *ra = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
+ int win;
wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
roundup(sizev4, 16) :
@@ -605,8 +631,18 @@ static int send_connect(struct c4iw_ep *ep)
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
DELACK(1) |
@@ -617,7 +653,7 @@ static int send_connect(struct c4iw_ep *ep)
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win>>10);
+ RCV_BUFSIZ(win);
opt2 = RX_CHANNEL(0) |
CCTRL_ECN(enable_ecn) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -627,7 +663,11 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= SACK_EN(1);
if (wscale && enable_tcp_window_scaling)
opt2 |= WND_SCALE_EN(1);
- t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ }
+ t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
if (ep->com.remote_addr.ss_family == AF_INET) {
@@ -641,8 +681,9 @@ static int send_connect(struct c4iw_ep *ep)
req->local_ip = la->sin_addr.s_addr;
req->peer_ip = ra->sin_addr.s_addr;
req->opt0 = cpu_to_be64(opt0);
- req->params = cpu_to_be32(select_ntuple(ep->com.dev,
- ep->dst, ep->l2t));
+ req->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
req->opt2 = cpu_to_be32(opt2);
} else {
req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
@@ -662,12 +703,19 @@ static int send_connect(struct c4iw_ep *ep)
req6->peer_ip_lo = *((__be64 *)
(ra6->sin6_addr.s6_addr + 8));
req6->opt0 = cpu_to_be64(opt0);
- req6->params = cpu_to_be32(
- select_ntuple(ep->com.dev, ep->dst,
- ep->l2t));
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
}
} else {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ if (peer2peer)
+ isn += 4;
+
if (ep->com.remote_addr.ss_family == AF_INET) {
t5_req = (struct cpl_t5_act_open_req *)
skb_put(skb, wrlen);
@@ -681,8 +729,12 @@ static int send_connect(struct c4iw_ep *ep)
t5_req->peer_ip = ra->sin_addr.s_addr;
t5_req->opt0 = cpu_to_be64(opt0);
t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
- select_ntuple(ep->com.dev,
- ep->dst, ep->l2t)));
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t)));
+ t5_req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req->rsvd));
t5_req->opt2 = cpu_to_be32(opt2);
} else {
t5_req6 = (struct cpl_t5_act_open_req6 *)
@@ -703,7 +755,12 @@ static int send_connect(struct c4iw_ep *ep)
(ra6->sin6_addr.s6_addr + 8));
t5_req6->opt0 = cpu_to_be64(opt0);
t5_req6->params = (__force __be64)cpu_to_be32(
- select_ntuple(ep->com.dev, ep->dst, ep->l2t));
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ t5_req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req6->rsvd));
t5_req6->opt2 = cpu_to_be32(opt2);
}
}
@@ -799,8 +856,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
ep->mpa_skb = skb;
c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
start_ep_timer(ep);
- state_set(&ep->com, MPA_REQ_SENT);
+ __state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
+ ep->snd_seq += mpalen;
return;
}
@@ -880,6 +938,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
+ ep->snd_seq += mpalen;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
@@ -963,7 +1022,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
ep->mpa_skb = skb;
- state_set(&ep->com, MPA_REP_SENT);
+ __state_set(&ep->com, MPA_REP_SENT);
+ ep->snd_seq += mpalen;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
@@ -980,6 +1040,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
/* setup the hwtid for this connection */
@@ -1003,17 +1064,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
send_mpa_req(ep, skb, 1);
else
send_mpa_req(ep, skb, mpa_rev);
-
+ mutex_unlock(&ep->com.mutex);
return 0;
}
-static void close_complete_upcall(struct c4iw_ep *ep)
+static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
+ event.status = status;
if (ep->com.cm_id) {
PDBG("close complete delivered ep %p cm_id %p tid %u\n",
ep, ep->com.cm_id, ep->hwtid);
@@ -1027,8 +1089,7 @@ static void close_complete_upcall(struct c4iw_ep *ep)
static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- close_complete_upcall(ep);
- state_set(&ep->com, ABORTING);
+ __state_set(&ep->com, ABORTING);
set_bit(ABORT_CONN, &ep->com.history);
return send_abort(ep, skb, gfp);
}
@@ -1106,9 +1167,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
-static void connect_request_upcall(struct c4iw_ep *ep)
+static int connect_request_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
+ int ret;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
@@ -1133,15 +1195,14 @@ static void connect_request_upcall(struct c4iw_ep *ep)
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
- if (state_read(&ep->parent_ep->com) != DEAD) {
- c4iw_get_ep(&ep->com);
- ep->parent_ep->com.cm_id->event_handler(
- ep->parent_ep->com.cm_id,
- &event);
- }
+ c4iw_get_ep(&ep->com);
+ ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+ &event);
+ if (ret)
+ c4iw_put_ep(&ep->com);
set_bit(CONNREQ_UPCALL, &ep->com.history);
c4iw_put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
+ return ret;
}
static void established_upcall(struct c4iw_ep *ep)
@@ -1173,6 +1234,14 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
return 0;
}
+ /*
+ * If we couldn't specify the entire rcv window at connection setup
+ * due to the limit in the number of bits in the RCV_BUFSIZ field,
+ * then add the overage in to the credits returned.
+ */
+ if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024)
+ credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024;
+
req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
@@ -1186,7 +1255,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
return credits;
}
-static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
@@ -1196,17 +1265,17 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
enum c4iw_qp_attr_mask mask;
int err;
+ int disconnect = 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
*/
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_SENT)
- return;
+ if (stop_ep_timer(ep))
+ return 0;
/*
* If we get more than the supported amount of private data
@@ -1228,7 +1297,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* if we don't even have the mpa message, then bail.
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
+ return 0;
mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */
@@ -1268,7 +1337,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* We'll continue process when more data arrives.
*/
if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
+ return 0;
if (mpa->flags & MPA_REJECT) {
err = -ECONNREFUSED;
@@ -1280,7 +1349,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* start reply message including private data. And
* the MPA header is valid.
*/
- state_set(&ep->com, FPDU_MODE);
+ __state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1370,9 +1439,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
+ disconnect = 1;
goto out;
}
@@ -1388,18 +1459,20 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
+ disconnect = 1;
goto out;
}
goto out;
err:
- state_set(&ep->com, ABORTING);
+ __state_set(&ep->com, ABORTING);
send_abort(ep, skb, GFP_KERNEL);
out:
connect_reply_upcall(ep, err);
- return;
+ return disconnect;
}
static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
@@ -1410,15 +1483,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) != MPA_REQ_WAIT)
- return;
-
/*
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1440,7 +1510,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
return;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
- stop_ep_timer(ep);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
@@ -1449,13 +1518,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1466,7 +1535,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* Fail if there's too much private data.
*/
if (plen > MPA_MAX_PRIVATE_DATA) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1475,7 +1544,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* If plen does not account for pkt size
*/
if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1532,10 +1601,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type);
- state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- connect_request_upcall(ep);
+ /*
+ * If the endpoint timer already expired, then we ignore
+ * the start request. process_timeout() will abort
+ * the connection.
+ */
+ if (!stop_ep_timer(ep)) {
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock(&ep->parent_ep->com.mutex);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ abort_connection(ep, skb, GFP_KERNEL);
+ } else {
+ abort_connection(ep, skb, GFP_KERNEL);
+ }
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ }
return;
}
@@ -1547,19 +1630,23 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(hdr);
struct tid_info *t = dev->rdev.lldi.tids;
__u8 status = hdr->status;
+ int disconnect = 0;
ep = lookup_tid(t, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
+ mutex_lock(&ep->com.mutex);
/* update RX credits */
update_rx_credits(ep, dlen);
- switch (state_read(&ep->com)) {
+ switch (ep->com.state) {
case MPA_REQ_SENT:
ep->rcv_seq += dlen;
- process_mpa_reply(ep, skb);
+ disconnect = process_mpa_reply(ep, skb);
break;
case MPA_REQ_WAIT:
ep->rcv_seq += dlen;
@@ -1572,15 +1659,19 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
pr_err("%s Unexpected streaming data." \
" qpid %u ep %p state %d tid %u status %d\n",
__func__, ep->com.qp->wq.sq.qid, ep,
- state_read(&ep->com), ep->hwtid, status);
+ ep->com.state, ep->hwtid, status);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ disconnect = 1;
break;
}
default:
break;
}
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
return 0;
}
@@ -1624,18 +1715,20 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
unsigned int mtu_idx;
int wscale;
struct sockaddr_in *sin;
+ int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
- req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst,
+ req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
- sin = (struct sockaddr_in *)&ep->com.local_addr;
+ sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
req->le.lport = sin->sin_port;
req->le.u.ipv4.lip = sin->sin_addr.s_addr;
- sin = (struct sockaddr_in *)&ep->com.remote_addr;
+ sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
req->le.pport = sin->sin_port;
req->le.u.ipv4.pip = sin->sin_addr.s_addr;
req->tcb.t_state_to_astid =
@@ -1645,8 +1738,18 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
(nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
@@ -1658,7 +1761,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win >> 10));
+ RCV_BUFSIZ(win));
req->tcb.opt2 = (__force __be32) (PACE(1) |
TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
RX_CHANNEL(0) |
@@ -1686,6 +1789,22 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_ARP_MISS;
}
+/* Returns whether a CPL status conveys negative advice.
+ */
+static int is_neg_adv(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE ||
+ status == CPL_ERR_KEEPALV_NEG_ADVICE;
+}
+
+static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
+{
+ ep->snd_win = snd_win;
+ ep->rcv_win = rcv_win;
+ PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+}
+
#define ACT_OPEN_RETRY_COUNT 2
static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
@@ -1734,6 +1853,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
ep->ctrlq_idx = cxgb4_port_idx(pdev);
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
dev_put(pdev);
} else {
pdev = get_real_dev(n->dev);
@@ -1742,16 +1862,17 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
- ep->tx_chan = cxgb4_port_chan(n->dev);
- ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
- ep->txq_idx = cxgb4_port_idx(n->dev) * step;
- ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
step = cdev->rdev.lldi.nrxq /
cdev->rdev.lldi.nchan;
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
- cxgb4_port_idx(n->dev) * step];
+ cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
if (clear_mpa_v1) {
ep->retry_with_mpa_v1 = 0;
@@ -1866,15 +1987,15 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct sockaddr_in6 *ra6;
ep = lookup_atid(t, atid);
- la = (struct sockaddr_in *)&ep->com.local_addr;
- ra = (struct sockaddr_in *)&ep->com.remote_addr;
- la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
- ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
- if (status == CPL_ERR_RTX_NEG_ADVICE) {
+ if (is_neg_adv(status)) {
printk(KERN_WARNING MOD "Connection problems for atid %u\n",
atid);
return 0;
@@ -1982,13 +2103,36 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
u64 opt0;
u32 opt2;
int wscale;
+ struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
+ int win;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
+
skb_get(skb);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ rpl = cplhdr(skb);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
DELACK(1) |
@@ -1999,7 +2143,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos >> 2) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win>>10);
+ RCV_BUFSIZ(win);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -2018,11 +2162,19 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN(1);
}
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ rpl5 = (void *)rpl;
+ memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
+ if (peer2peer)
+ isn += 4;
+ rpl5->iss = cpu_to_be32(isn);
+ PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ }
- rpl = cplhdr(skb);
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -2037,7 +2189,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
- skb_get(skb);
release_tid(&dev->rdev, hwtid, skb);
return;
}
@@ -2087,6 +2238,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
int err;
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
+ unsigned short hdrs;
parent_ep = lookup_stid(t, stid);
if (!parent_ep) {
@@ -2144,8 +2296,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- if (peer_mss && child_ep->mtu > (peer_mss + 40))
- child_ep->mtu = peer_mss + 40;
+ hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
+ if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
+ child_ep->mtu = peer_mss + hdrs;
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
@@ -2279,13 +2433,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
disconnect = 0;
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
- close_complete_upcall(ep);
+ close_complete_upcall(ep, 0);
__state_set(&ep->com, DEAD);
release = 1;
disconnect = 0;
@@ -2304,15 +2458,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
@@ -2326,7 +2471,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(req);
ep = lookup_tid(t, tid);
- if (is_neg_adv_abort(req->status)) {
+ if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
return 0;
@@ -2348,10 +2493,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case CONNECTING:
break;
case MPA_REQ_WAIT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
break;
case MPA_REQ_SENT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
@@ -2456,7 +2601,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
__state_set(&ep->com, MORIBUND);
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if ((ep->com.cm_id) && (ep->com.qp)) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp,
@@ -2464,7 +2609,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- close_complete_upcall(ep);
+ close_complete_upcall(ep, 0);
__state_set(&ep->com, DEAD);
release = 1;
break;
@@ -2539,22 +2684,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
- int err;
+ int err = 0;
+ int disconnect = 0;
struct c4iw_ep *ep = to_ep(cm_id);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return -ECONNRESET;
}
set_bit(ULP_REJECT, &ep->com.history);
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
if (mpa_rev == 0)
abort_connection(ep, NULL, GFP_KERNEL);
else {
err = send_mpa_reject(ep, pdata, pdata_len);
- err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ disconnect = 1;
}
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
@@ -2569,12 +2720,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
+
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
err = -ECONNRESET;
goto err;
}
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
BUG_ON(!qp);
set_bit(ULP_ACCEPT, &ep->com.history);
@@ -2643,14 +2796,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (err)
goto err1;
- state_set(&ep->com, FPDU_MODE);
+ __state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return 0;
err1:
ep->com.cm_id = NULL;
cm_id->rem_ref(cm_id);
err:
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return err;
}
@@ -2721,13 +2876,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
struct c4iw_ep *ep;
int err = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
- struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
- struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
- &cm_id->remote_addr;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
__u8 *ra;
int iptype;
+ int iwpm_err = 0;
if ((conn_param->ord > c4iw_max_read_depth) ||
(conn_param->ird > c4iw_max_read_depth)) {
@@ -2758,7 +2915,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
- goto fail2;
+ goto fail1;
}
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -2771,10 +2928,50 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
- goto fail2;
+ goto fail1;
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
+
+ /* No port mapper available, go with the specified peer information */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.mapped_remote_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ c4iw_form_pm_msg(ep, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ c4iw_record_pm_msg(ep, &pm_msg);
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ print_addr(&ep->com, __func__, "add_query/create_mapinfo");
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
+
+ laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+
if (cm_id->remote_addr.ss_family == AF_INET) {
iptype = 4;
ra = (__u8 *)&raddr->sin_addr;
@@ -2785,7 +2982,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail1;
}
/* find a route */
@@ -2805,7 +3002,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail1;
}
/* find a route */
@@ -2821,13 +3018,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
- goto fail3;
+ goto fail2;
}
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
- goto fail4;
+ goto fail3;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -2836,10 +3033,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
state_set(&ep->com, CONNECTING);
ep->tos = 0;
- memcpy(&ep->com.local_addr, &cm_id->local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
- sizeof(ep->com.remote_addr));
/* send connect request to rnic */
err = send_connect(ep);
@@ -2847,12 +3040,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto out;
cxgb4_l2t_release(ep->l2t);
-fail4:
- dst_release(ep->dst);
fail3:
+ dst_release(ep->dst);
+fail2:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail2:
+fail1:
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
out:
@@ -2862,7 +3055,8 @@ out:
static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
{
int err;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
@@ -2883,7 +3077,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
{
int err;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
if (dev->rdev.lldi.enable_fw_ofld_conn) {
do {
@@ -2918,6 +3113,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
int err = 0;
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
struct c4iw_listen_ep *ep;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
might_sleep();
@@ -2938,7 +3136,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
/*
* Allocate a server TID.
*/
- if (dev->rdev.lldi.enable_fw_ofld_conn)
+ if (dev->rdev.lldi.enable_fw_ofld_conn &&
+ ep->com.local_addr.ss_family == AF_INET)
ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
cm_id->local_addr.ss_family, ep);
else
@@ -2951,6 +3150,37 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
goto fail2;
}
insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+
+ /* No port mapper available, go with the specified info */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ memcpy(&ep->com.mapped_local_addr,
+ &pm_msg.mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ err = -ENOMEM;
+ goto fail3;
+ }
+ print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
state_set(&ep->com, LISTEN);
if (ep->com.local_addr.ss_family == AF_INET)
err = create_server4(dev, ep);
@@ -2960,6 +3190,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = ep;
goto out;
}
+
+fail3:
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
fail2:
@@ -3018,7 +3250,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
rdev = &ep->com.dev->rdev;
if (c4iw_fatal_error(rdev)) {
fatal = 1;
- close_complete_upcall(ep);
+ close_complete_upcall(ep, -EIO);
ep->com.state = DEAD;
}
switch (ep->com.state) {
@@ -3040,7 +3272,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
close = 1;
if (abrupt) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
@@ -3060,7 +3292,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (close) {
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
- close_complete_upcall(ep);
+ close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep, NULL, gfp);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3241,6 +3473,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
struct sk_buff *req_skb;
struct fw_ofld_connection_wr *req;
struct cpl_pass_accept_req *cpl = cplhdr(skb);
+ int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
@@ -3277,7 +3510,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
req->cookie = (unsigned long)skb;
set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
- cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+ ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+ if (ret < 0) {
+ pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
+ ret);
+ kfree_skb(skb);
+ kfree_skb(req_skb);
+ }
}
/*
@@ -3323,9 +3562,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
/*
* Calculate the server tid from filter hit index from cpl_rx_pkt.
*/
- stid = (__force int) cpu_to_be32((__force u32) rss->hash_val)
- - dev->rdev.lldi.tids->sftid_base
- + dev->rdev.lldi.tids->nstids;
+ stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
if (!lep) {
@@ -3386,6 +3623,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
pi = (struct port_info *)netdev_priv(pdev);
tx_chan = cxgb4_port_chan(pdev);
}
+ neigh_release(neigh);
if (!e) {
pr_err("%s - failed to allocate l2t entry!\n",
__func__);
@@ -3397,7 +3635,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
window = (__force u16) htons((__force u16)tcph->window);
/* Calcuate filter portion for LE region. */
- filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e));
+ filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
+ dev->rdev.lldi.ports[0],
+ e));
/*
* Synthesize the cpl_pass_accept_req. We have everything except the
@@ -3464,15 +3704,26 @@ static void process_timeout(struct c4iw_ep *ep)
&attrs, 1);
}
__state_set(&ep->com, ABORTING);
+ close_complete_upcall(ep, -ETIMEDOUT);
+ break;
+ case ABORTING:
+ case DEAD:
+
+ /*
+ * These states are expected if the ep timed out at the same
+ * time as another thread was calling stop_ep_timer().
+ * So we silently do nothing for these states.
+ */
+ abort = 0;
break;
default:
WARN(1, "%s unexpected state ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- mutex_unlock(&ep->com.mutex);
if (abort)
abort_connection(ep, NULL, GFP_KERNEL);
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
}
@@ -3486,6 +3737,8 @@ static void process_timedout_eps(void)
tmp = timeout_list.next;
list_del(tmp);
+ tmp->next = NULL;
+ tmp->prev = NULL;
spin_unlock_irq(&timeout_lock);
ep = list_entry(tmp, struct c4iw_ep, entry);
process_timeout(ep);
@@ -3502,6 +3755,7 @@ static void process_work(struct work_struct *work)
unsigned int opcode;
int ret;
+ process_timedout_eps();
while ((skb = skb_dequeue(&rxq))) {
rpl = cplhdr(skb);
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
@@ -3511,8 +3765,8 @@ static void process_work(struct work_struct *work)
ret = work_handlers[opcode](dev, skb);
if (!ret)
kfree_skb(skb);
+ process_timedout_eps();
}
- process_timedout_eps();
}
static DECLARE_WORK(skb_work, process_work);
@@ -3524,8 +3778,13 @@ static void ep_timeout(unsigned long arg)
spin_lock(&timeout_lock);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
- list_add_tail(&ep->entry, &timeout_list);
- kickit = 1;
+ /*
+ * Only insert if it is not already on the list.
+ */
+ if (!ep->entry.next) {
+ list_add_tail(&ep->entry, &timeout_list);
+ kickit = 1;
+ }
}
spin_unlock(&timeout_lock);
if (kickit)
@@ -3607,7 +3866,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
kfree_skb(skb);
return 0;
}
- if (is_neg_adv_abort(req->status)) {
+ if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
kfree_skb(skb);
@@ -3666,7 +3925,7 @@ int __init c4iw_cm_init(void)
return 0;
}
-void __exit c4iw_cm_term(void)
+void c4iw_cm_term(void)
{
WARN_ON(!list_empty(&timeout_list));
flush_workqueue(workq);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 88de3aa9c5b..c04292c950f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -134,7 +134,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
V_FW_RI_RES_WR_IQANUS(0) |
V_FW_RI_RES_WR_IQANUD(1) |
F_FW_RI_RES_WR_IQANDST |
- V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids));
+ V_FW_RI_RES_WR_IQANDSTINDEX(
+ rdev->lldi.ciq_ids[cq->vector]));
res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
F_FW_RI_RES_WR_IQDROPRSS |
V_FW_RI_RES_WR_IQPCIECH(2) |
@@ -235,27 +236,21 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
struct t4_cq *cq = &chp->cq;
int idx;
struct t4_swsqe *swsqe;
- int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
- qhp->attr.state != C4IW_QP_STATE_IDLE);
if (wq->sq.flush_cidx == -1)
wq->sq.flush_cidx = wq->sq.cidx;
idx = wq->sq.flush_cidx;
BUG_ON(idx >= wq->sq.size);
while (idx != wq->sq.pidx) {
- if (error) {
- swsqe = &wq->sq.sw_sq[idx];
- BUG_ON(swsqe->flushed);
- swsqe->flushed = 1;
- insert_sq_cqe(wq, cq, swsqe);
- if (wq->sq.oldest_read == swsqe) {
- BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
- advance_oldest_read(wq);
- }
- flushed++;
- } else {
- t4_sq_consume(wq);
+ swsqe = &wq->sq.sw_sq[idx];
+ BUG_ON(swsqe->flushed);
+ swsqe->flushed = 1;
+ insert_sq_cqe(wq, cq, swsqe);
+ if (wq->sq.oldest_read == swsqe) {
+ BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+ advance_oldest_read(wq);
}
+ flushed++;
if (++idx == wq->sq.size)
idx = 0;
}
@@ -365,8 +360,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
- /*
- * drop peer2peer RTR reads.
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1)
+ goto next_cqe;
+
+ /* drop peer2peer RTR reads.
*/
if (CQE_WRID_STAG(hw_cqe) == 1)
goto next_cqe;
@@ -511,8 +512,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
*/
if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
- /*
- * If this is an unsolicited read response, then the read
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1) {
+ if (CQE_STATUS(hw_cqe))
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /* If this is an unsolicited read response, then the read
* was generated by the kernel driver as part of peer-2-peer
* connection setup. So ignore the completion.
*/
@@ -603,7 +614,7 @@ proc_cqe:
*/
if (SQ_TYPE(hw_cqe)) {
int idx = CQE_WRID_SQ_IDX(hw_cqe);
- BUG_ON(idx > wq->sq.size);
+ BUG_ON(idx >= wq->sq.size);
/*
* Account for any unsignaled completions completed by
@@ -617,7 +628,7 @@ proc_cqe:
wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
else
wq->sq.in_use -= idx - wq->sq.cidx;
- BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+ BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
@@ -662,7 +673,7 @@ skip_cqe:
static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
{
struct c4iw_qp *qhp = NULL;
- struct t4_cqe cqe = {0, 0}, *rd_cqe;
+ struct t4_cqe uninitialized_var(cqe), *rd_cqe;
struct t4_wq *wq;
u32 credit = 0;
u8 cqe_flushed;
@@ -860,6 +871,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
rhp = to_c4iw_dev(ibdev);
+ if (vector >= rhp->rdev.lldi.nciq)
+ return ERR_PTR(-EINVAL);
+
chp = kzalloc(sizeof(*chp), GFP_KERNEL);
if (!chp)
return ERR_PTR(-ENOMEM);
@@ -881,7 +895,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
/*
* Make actual HW queue 2x to avoid cdix_inc overflows.
*/
- hwentries = entries * 2;
+ hwentries = min(entries * 2, T4_MAX_IQ_SIZE);
/*
* Make HW queue at least 64 entries so GTS updates aren't too
@@ -905,6 +919,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
}
chp->cq.size = hwentries;
chp->cq.memsize = memsize;
+ chp->cq.vector = vector;
ret = create_cq(&rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -940,7 +955,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
uresp.gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
goto err5;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab56..7db82b24302 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -64,6 +64,10 @@ struct uld_ctx {
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
+#define DB_FC_RESUME_SIZE 64
+#define DB_FC_RESUME_DELAY 1
+#define DB_FC_DRAIN_THRESH 0
+
static struct dentry *c4iw_debugfs_root;
struct c4iw_debugfs_data {
@@ -73,6 +77,16 @@ struct c4iw_debugfs_data {
int pos;
};
+/* registered cxgb4 netlink callbacks */
+static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
static int count_idrs(int id, void *p, void *data)
{
int *countp = data;
@@ -109,35 +123,49 @@ static int dump_qp(int id, void *p, void *data)
&qp->ep->com.local_addr;
struct sockaddr_in *rsin = (struct sockaddr_in *)
&qp->ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_remote_addr;
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
- "%pI4:%u->%pI4:%u\n",
+ "%pI4:%u/%u->%pI4:%u/%u\n",
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&lsin->sin_addr, ntohs(lsin->sin_port),
- &rsin->sin_addr, ntohs(rsin->sin_port));
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&qp->ep->com.local_addr;
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
&qp->ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_remote_addr;
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
- "%pI6:%u->%pI6:%u\n",
+ "%pI6:%u/%u->%pI6:%u/%u\n",
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&lsin6->sin6_addr,
ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port),
&rsin6->sin6_addr,
- ntohs(rsin6->sin6_port));
+ ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
}
} else
cc = snprintf(qpd->buf + qpd->pos, space,
@@ -282,7 +310,7 @@ static const struct file_operations stag_debugfs_fops = {
.llseek = default_llseek,
};
-static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
+static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
static int stats_show(struct seq_file *seq, void *v)
{
@@ -311,9 +339,10 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
- seq_printf(seq, " DB State: %s Transitions %llu\n",
+ seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
db_state_str[dev->db_state],
- dev->rdev.stats.db_state_transitions);
+ dev->rdev.stats.db_state_transitions,
+ dev->rdev.stats.db_fc_interruptions);
seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
dev->rdev.stats.act_ofld_conn_fails);
@@ -381,31 +410,43 @@ static int dump_ep(int id, void *p, void *data)
&ep->com.local_addr;
struct sockaddr_in *rsin = (struct sockaddr_in *)
&ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
- "%pI4:%d <-> %pI4:%d\n",
+ "%pI4:%d/%d <-> %pI4:%d/%d\n",
ep, ep->com.cm_id, ep->com.qp,
(int)ep->com.state, ep->com.flags,
ep->com.history, ep->hwtid, ep->atid,
&lsin->sin_addr, ntohs(lsin->sin_port),
- &rsin->sin_addr, ntohs(rsin->sin_port));
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.local_addr;
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
&ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
- "%pI6:%d <-> %pI6:%d\n",
+ "%pI6:%d/%d <-> %pI6:%d/%d\n",
ep, ep->com.cm_id, ep->com.qp,
(int)ep->com.state, ep->com.flags,
ep->com.history, ep->hwtid, ep->atid,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+ ntohs(mapped_lsin6->sin6_port),
+ &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -426,23 +467,29 @@ static int dump_listen_ep(int id, void *p, void *data)
if (ep->com.local_addr.ss_family == AF_INET) {
struct sockaddr_in *lsin = (struct sockaddr_in *)
&ep->com.local_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p state %d flags 0x%lx stid %d "
- "backlog %d %pI4:%d\n",
+ "backlog %d %pI4:%d/%d\n",
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
- &lsin->sin_addr, ntohs(lsin->sin_port));
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ ntohs(mapped_lsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.local_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p state %d flags 0x%lx stid %d "
- "backlog %d %pI6:%d\n",
+ "backlog %d %pI6:%d/%d\n",
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
- &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -602,10 +649,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.vr->qp.size,
rdev->lldi.vr->cq.start,
rdev->lldi.vr->cq.size);
- PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
+ PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
"qpmask 0x%x cqshift %lu cqmask 0x%x\n",
(unsigned)pci_resource_len(rdev->lldi.pdev, 2),
- (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2),
+ (u64)pci_resource_start(rdev->lldi.pdev, 2),
rdev->lldi.db_reg,
rdev->lldi.gts_reg,
rdev->qpshift, rdev->qpmask,
@@ -643,6 +690,13 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
goto err4;
}
+ rdev->status_page = (struct t4_dev_status_page *)
+ __get_free_page(GFP_KERNEL);
+ if (!rdev->status_page) {
+ pr_err(MOD "error allocating status page\n");
+ goto err4;
+ }
+ rdev->status_page->db_off = 0;
return 0;
err4:
c4iw_rqtpool_destroy(rdev);
@@ -656,6 +710,7 @@ err1:
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
+ free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
c4iw_destroy_resource(&rdev->resource);
@@ -670,7 +725,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
- iounmap(ctx->dev->rdev.oc_mw_kva);
+ if (ctx->dev->rdev.bar2_kva)
+ iounmap(ctx->dev->rdev.bar2_kva);
+ if (ctx->dev->rdev.oc_mw_kva)
+ iounmap(ctx->dev->rdev.oc_mw_kva);
ib_dealloc_device(&ctx->dev->ibdev);
ctx->dev = NULL;
}
@@ -703,18 +761,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pr_info("%s: On-Chip Queues not supported on this device.\n",
pci_name(infop->pdev));
- if (!is_t4(infop->adapter_type)) {
- if (!allow_db_fc_on_t5) {
- db_fc_threshold = 100000;
- pr_info("DB Flow Control Disabled.\n");
- }
-
- if (!allow_db_coalescing_on_t5) {
- db_coalescing_threshold = -1;
- pr_info("DB Coalescing Disabled.\n");
- }
- }
-
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
printk(KERN_ERR MOD "Cannot allocate ib device\n");
@@ -722,11 +768,33 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
}
devp->rdev.lldi = *infop;
- devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
- (pci_resource_len(devp->rdev.lldi.pdev, 2) -
- roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
- devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
- devp->rdev.lldi.vr->ocq.size);
+ /*
+ * For T5 devices, we map all of BAR2 with WC.
+ * For T4 devices with onchip qp mem, we map only that part
+ * of BAR2 with WC.
+ */
+ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
+ if (is_t5(devp->rdev.lldi.adapter_type)) {
+ devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
+ pci_resource_len(devp->rdev.lldi.pdev, 2));
+ if (!devp->rdev.bar2_kva) {
+ pr_err(MOD "Unable to ioremap BAR2\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ } else if (ocqp_supported(infop)) {
+ devp->rdev.oc_mw_pa =
+ pci_resource_start(devp->rdev.lldi.pdev, 2) +
+ pci_resource_len(devp->rdev.lldi.pdev, 2) -
+ roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
+ devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
+ devp->rdev.lldi.vr->ocq.size);
+ if (!devp->rdev.oc_mw_kva) {
+ pr_err(MOD "Unable to ioremap onchip mem\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ }
PDBG(KERN_INFO MOD "ocq memory: "
"hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
@@ -749,6 +817,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
spin_lock_init(&devp->lock);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
+ INIT_LIST_HEAD(&devp->db_fc_list);
if (c4iw_debugfs_root) {
devp->debugfs_root = debugfs_create_dir(
@@ -756,6 +825,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
c4iw_debugfs_root);
setup_debugfs(devp);
}
+
+
return devp;
}
@@ -897,11 +968,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
}
opcode = *(u8 *)rsp;
- if (c4iw_handlers[opcode])
+ if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
- else
+ } else {
pr_info("%s no handler opcode 0x%x...\n", __func__,
opcode);
+ kfree_skb(skb);
+ }
return 0;
nomem:
@@ -977,13 +1050,16 @@ static int disable_qp_db(int id, void *p, void *data)
static void stop_queues(struct uld_ctx *ctx)
{
- spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->db_state == NORMAL) {
- ctx->dev->rdev.stats.db_state_transitions++;
- ctx->dev->db_state = FLOW_CONTROL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->dev->lock, flags);
+ ctx->dev->rdev.stats.db_state_transitions++;
+ ctx->dev->db_state = STOPPED;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
- }
- spin_unlock_irq(&ctx->dev->lock);
+ else
+ ctx->dev->rdev.status_page->db_off = 1;
+ spin_unlock_irqrestore(&ctx->dev->lock, flags);
}
static int enable_qp_db(int id, void *p, void *data)
@@ -994,15 +1070,72 @@ static int enable_qp_db(int id, void *p, void *data)
return 0;
}
+static void resume_rc_qp(struct c4iw_qp *qp)
+{
+ spin_lock(&qp->lock);
+ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.sq.wq_pidx_inc = 0;
+ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+}
+
+static void resume_a_chunk(struct uld_ctx *ctx)
+{
+ int i;
+ struct c4iw_qp *qp;
+
+ for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
+ qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
+ db_fc_entry);
+ list_del_init(&qp->db_fc_entry);
+ resume_rc_qp(qp);
+ if (list_empty(&ctx->dev->db_fc_list))
+ break;
+ }
+}
+
static void resume_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->qpcnt <= db_fc_threshold &&
- ctx->dev->db_state == FLOW_CONTROL) {
- ctx->dev->db_state = NORMAL;
- ctx->dev->rdev.stats.db_state_transitions++;
- idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+ if (ctx->dev->db_state != STOPPED)
+ goto out;
+ ctx->dev->db_state = FLOW_CONTROL;
+ while (1) {
+ if (list_empty(&ctx->dev->db_fc_list)) {
+ WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
+ ctx->dev->db_state = NORMAL;
+ ctx->dev->rdev.stats.db_state_transitions++;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
+ idr_for_each(&ctx->dev->qpidr, enable_qp_db,
+ NULL);
+ } else {
+ ctx->dev->rdev.status_page->db_off = 0;
+ }
+ break;
+ } else {
+ if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
+ < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
+ DB_FC_DRAIN_THRESH)) {
+ resume_a_chunk(ctx);
+ }
+ if (!list_empty(&ctx->dev->db_fc_list)) {
+ spin_unlock_irq(&ctx->dev->lock);
+ if (DB_FC_RESUME_DELAY) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(DB_FC_RESUME_DELAY);
+ }
+ spin_lock_irq(&ctx->dev->lock);
+ if (ctx->dev->db_state != FLOW_CONTROL)
+ break;
+ }
+ }
}
+out:
+ if (ctx->dev->db_state != NORMAL)
+ ctx->dev->rdev.stats.db_fc_interruptions++;
spin_unlock_irq(&ctx->dev->lock);
}
@@ -1028,12 +1161,12 @@ static int count_qps(int id, void *p, void *data)
return 0;
}
-static void deref_qps(struct qp_list qp_list)
+static void deref_qps(struct qp_list *qp_list)
{
int idx;
- for (idx = 0; idx < qp_list.idx; idx++)
- c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
+ for (idx = 0; idx < qp_list->idx; idx++)
+ c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
}
static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
@@ -1044,17 +1177,22 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
for (idx = 0; idx < qp_list->idx; idx++) {
struct c4iw_qp *qp = qp_list->qps[idx];
+ spin_lock_irq(&qp->rhp->lock);
+ spin_lock(&qp->lock);
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.sq.qid,
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - "
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
return;
}
+ qp->wq.sq.wq_pidx_inc = 0;
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.rq.qid,
@@ -1062,12 +1200,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - "
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
return;
}
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
/* Wait for the dbfifo to drain */
while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
@@ -1083,36 +1226,22 @@ static void recover_queues(struct uld_ctx *ctx)
struct qp_list qp_list;
int ret;
- /* lock out kernel db ringers */
- mutex_lock(&ctx->dev->db_mutex);
-
- /* put all queues in to recovery mode */
- spin_lock_irq(&ctx->dev->lock);
- ctx->dev->db_state = RECOVERY;
- ctx->dev->rdev.stats.db_state_transitions++;
- idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
- spin_unlock_irq(&ctx->dev->lock);
-
/* slow everybody down */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(1000));
- /* Wait for the dbfifo to completely drain. */
- while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(10));
- }
-
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
- goto out;
+ return;
}
/* Count active queues so we can build a list of queues to recover */
spin_lock_irq(&ctx->dev->lock);
+ WARN_ON(ctx->dev->db_state != STOPPED);
+ ctx->dev->db_state = RECOVERY;
idr_for_each(&ctx->dev->qpidr, count_qps, &count);
qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
@@ -1120,7 +1249,7 @@ static void recover_queues(struct uld_ctx *ctx)
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
spin_unlock_irq(&ctx->dev->lock);
- goto out;
+ return;
}
qp_list.idx = 0;
@@ -1133,29 +1262,13 @@ static void recover_queues(struct uld_ctx *ctx)
recover_lost_dbs(ctx, &qp_list);
/* we're almost done! deref the qps and clean up */
- deref_qps(qp_list);
+ deref_qps(&qp_list);
kfree(qp_list.qps);
- /* Wait for the dbfifo to completely drain again */
- while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(10));
- }
-
- /* resume the queues */
spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->qpcnt > db_fc_threshold)
- ctx->dev->db_state = FLOW_CONTROL;
- else {
- ctx->dev->db_state = NORMAL;
- idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
- }
- ctx->dev->rdev.stats.db_state_transitions++;
+ WARN_ON(ctx->dev->db_state != RECOVERY);
+ ctx->dev->db_state = STOPPED;
spin_unlock_irq(&ctx->dev->lock);
-
-out:
- /* start up kernel db ringers again */
- mutex_unlock(&ctx->dev->db_mutex);
}
static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -1165,9 +1278,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
switch (control) {
case CXGB4_CONTROL_DB_FULL:
stop_queues(ctx);
- mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_full++;
- mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
case CXGB4_CONTROL_DB_EMPTY:
resume_queues(ctx);
@@ -1210,6 +1321,20 @@ static int __init c4iw_init_module(void)
printk(KERN_WARNING MOD
"could not create debugfs entry, continuing\n");
+ if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
+ c4iw_nl_cb_table))
+ pr_err("%s[%u]: Failed to add netlink callback\n"
+ , __func__, __LINE__);
+
+ err = iwpm_init(RDMA_NL_C4IW);
+ if (err) {
+ pr_err("port mapper initialization failed with %d\n", err);
+ ibnl_remove_client(RDMA_NL_C4IW);
+ c4iw_cm_term();
+ debugfs_remove_recursive(c4iw_debugfs_root);
+ return err;
+ }
+
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
return 0;
@@ -1227,6 +1352,8 @@ static void __exit c4iw_exit_module(void)
}
mutex_unlock(&dev_mutex);
cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+ iwpm_exit(RDMA_NL_C4IW);
+ ibnl_remove_client(RDMA_NL_C4IW);
c4iw_cm_term();
debugfs_remove_recursive(c4iw_debugfs_root);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 23eaeabab93..361fff7a074 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -52,6 +52,8 @@
#include <rdma/ib_verbs.h>
#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
#include "cxgb4.h"
#include "cxgb4_uld.h"
@@ -109,6 +111,7 @@ struct c4iw_dev_ucontext {
enum c4iw_rdev_flags {
T4_FATAL_ERROR = (1<<0),
+ T4_STATUS_PAGE_DISABLED = (1<<1),
};
struct c4iw_stat {
@@ -130,6 +133,7 @@ struct c4iw_stats {
u64 db_empty;
u64 db_drop;
u64 db_state_transitions;
+ u64 db_fc_interruptions;
u64 tcam_full;
u64 act_ofld_conn_fails;
u64 pas_ofld_conn_fails;
@@ -147,9 +151,12 @@ struct c4iw_rdev {
struct gen_pool *ocqp_pool;
u32 flags;
struct cxgb4_lld_info lldi;
+ unsigned long bar2_pa;
+ void __iomem *bar2_kva;
unsigned long oc_mw_pa;
void __iomem *oc_mw_kva;
struct c4iw_stats stats;
+ struct t4_dev_status_page *status_page;
};
static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -211,7 +218,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
enum db_state {
NORMAL = 0,
FLOW_CONTROL = 1,
- RECOVERY = 2
+ RECOVERY = 2,
+ STOPPED = 3
};
struct c4iw_dev {
@@ -225,10 +233,10 @@ struct c4iw_dev {
struct mutex db_mutex;
struct dentry *debugfs_root;
enum db_state db_state;
- int qpcnt;
struct idr hwtid_idr;
struct idr atid_idr;
struct idr stid_idr;
+ struct list_head db_fc_list;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -369,6 +377,7 @@ struct c4iw_fr_page_list {
DEFINE_DMA_UNMAP_ADDR(mapping);
dma_addr_t dma_addr;
struct c4iw_dev *dev;
+ int pll_len;
};
static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
@@ -428,10 +437,12 @@ struct c4iw_qp_attributes {
u8 ecode;
u16 sq_db_inc;
u16 rq_db_inc;
+ u8 send_term;
};
struct c4iw_qp {
struct ib_qp ibqp;
+ struct list_head db_fc_entry;
struct c4iw_dev *rhp;
struct c4iw_ep *ep;
struct c4iw_qp_attributes attr;
@@ -441,6 +452,7 @@ struct c4iw_qp {
atomic_t refcnt;
wait_queue_head_t wait;
struct timer_list timer;
+ int sq_sig_all;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -718,6 +730,7 @@ enum c4iw_ep_flags {
CLOSE_SENT = 3,
TIMEOUT = 4,
QP_REFERENCED = 5,
+ RELEASE_MAPINFO = 6,
};
enum c4iw_ep_history {
@@ -754,6 +767,8 @@ struct c4iw_ep_common {
struct mutex mutex;
struct sockaddr_storage local_addr;
struct sockaddr_storage remote_addr;
+ struct sockaddr_storage mapped_local_addr;
+ struct sockaddr_storage mapped_remote_addr;
struct c4iw_wr_wait wr_wait;
unsigned long flags;
unsigned long history;
@@ -795,7 +810,48 @@ struct c4iw_ep {
u8 retry_with_mpa_v1;
u8 tried_with_mpa_v1;
unsigned int retry_count;
-};
+ int snd_win;
+ int rcv_win;
+};
+
+static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
+ const char *msg)
+{
+
+#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
+#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
+#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
+#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
+
+ if (c4iw_debug) {
+ switch (epc->local_addr.ss_family) {
+ case AF_INET:
+ PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
+ func, msg, SINA(&epc->local_addr),
+ SINP(&epc->local_addr),
+ SINP(&epc->mapped_local_addr),
+ SINA(&epc->remote_addr),
+ SINP(&epc->remote_addr),
+ SINP(&epc->mapped_remote_addr));
+ break;
+ case AF_INET6:
+ PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
+ func, msg, SIN6A(&epc->local_addr),
+ SIN6P(&epc->local_addr),
+ SIN6P(&epc->mapped_local_addr),
+ SIN6A(&epc->remote_addr),
+ SIN6P(&epc->remote_addr),
+ SIN6P(&epc->mapped_remote_addr));
+ break;
+ default:
+ break;
+ }
+ }
+#undef SINA
+#undef SINP
+#undef SIN6A
+#undef SIN6P
+}
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
{
@@ -852,7 +908,7 @@ int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
int c4iw_register_device(struct c4iw_dev *dev);
void c4iw_unregister_device(struct c4iw_dev *dev);
int __init c4iw_cm_init(void);
-void __exit c4iw_cm_term(void);
+void c4iw_cm_term(void);
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
struct c4iw_dev_ucontext *uctx);
void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 4cb8eb24497..ec7a2988a70 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -37,9 +37,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 1;
+int use_dsgl = 0;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -76,7 +76,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
INIT_ULPTX_WR(req, wr_len, 0, 0);
req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
(wait ? FW_WR_COMPL(1) : 0));
- req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0;
+ req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
@@ -173,7 +173,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
return ret;
}
-int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
{
u32 remain = len;
u32 dmalen;
@@ -259,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
- if (!stag_idx)
+ if (!stag_idx) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.fail++;
+ mutex_unlock(&rdev->stats.lock);
return -ENOMEM;
+ }
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.cur += 32;
if (rdev->stats.stag.cur > rdev->stats.stag.max)
@@ -678,9 +682,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@@ -710,10 +714,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
-
+ n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
@@ -726,24 +727,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
- mhp->umem->page_size * k);
- if (i == PAGE_SIZE / sizeof *pages) {
- err = write_pbl(&mhp->rhp->rdev,
- pages,
- mhp->attr.pbl_addr + (n << 3), i);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = write_pbl(&mhp->rhp->rdev,
+ pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
@@ -903,7 +902,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
dma_unmap_addr_set(c4pl, mapping, dma_addr);
c4pl->dma_addr = dma_addr;
c4pl->dev = dev;
- c4pl->ibpl.max_page_list_len = pll_len;
+ c4pl->pll_len = pll_len;
+
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
return &c4pl->ibpl;
}
@@ -912,8 +915,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
{
struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
+
dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
- c4pl->ibpl.max_page_list_len,
+ c4pl->pll_len,
c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
kfree(c4pl);
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7e94c9a656a..b1d305338de 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -106,15 +106,57 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
+ static int warned;
+ struct c4iw_alloc_ucontext_resp uresp;
+ int ret = 0;
+ struct c4iw_mm_entry *mm = NULL;
PDBG("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
+ if (!context) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
+
+ if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
+ if (!warned++)
+ pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
+ } else {
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+ if (!mm) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ uresp.status_page_size = PAGE_SIZE;
+
+ spin_lock(&context->mmap_lock);
+ uresp.status_page_key = context->key;
+ context->key += PAGE_SIZE;
+ spin_unlock(&context->mmap_lock);
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
+ if (ret)
+ goto err_mm;
+
+ mm->key = uresp.status_page_key;
+ mm->addr = virt_to_phys(rhp->rdev.status_page);
+ mm->len = PAGE_SIZE;
+ insert_mmap(context, mm);
+ }
return &context->ibucontext;
+err_mm:
+ kfree(mm);
+err_free:
+ kfree(context);
+err:
+ return ERR_PTR(ret);
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -287,7 +329,7 @@ static int c4iw_query_device(struct ib_device *ibdev,
props->max_mr = c4iw_num_stags(&dev->rdev);
props->max_pd = T4_MAX_NUM_PD;
props->local_ca_ack_delay = 0;
- props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
+ props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
return 0;
}
@@ -458,7 +500,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.node_type = RDMA_NODE_RNIC;
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
- dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
dev->ibdev.query_device = c4iw_query_device;
dev->ibdev.query_port = c4iw_query_port;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 582936708e6..086f62f5dc9 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg;
wq->gts = rdev->lldi.gts_reg;
- if (user) {
- wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
- (wq->sq.qid << rdev->qpshift);
- wq->sq.udb &= PAGE_MASK;
- wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
- (wq->rq.qid << rdev->qpshift);
- wq->rq.udb &= PAGE_MASK;
+ if (user || is_t5(rdev->lldi.adapter_type)) {
+ u32 off;
+
+ off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
+ off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
}
wq->rdev = rdev;
wq->rq.msn = 1;
@@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
+ PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n",
__func__, wq->sq.qid, wq->rq.qid, wq->db,
- (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
+ (__force unsigned long) wq->sq.udb,
+ (__force unsigned long) wq->rq.udb);
return 0;
free_dma:
@@ -425,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
default:
return -EINVAL;
}
+ wqe->send.r3 = 0;
+ wqe->send.r4 = 0;
plen = 0;
if (wr->num_sge) {
@@ -555,7 +568,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
int rem;
- if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH)
+ if (wr->wr.fast_reg.page_list_len >
+ t4_max_fr_depth(use_dsgl))
return -EINVAL;
wqe->fr.qpbinde_to_dcacpu = 0;
@@ -638,6 +652,48 @@ void c4iw_qp_rem_ref(struct ib_qp *qp)
wake_up(&(to_c4iw_qp(qp)->wait));
}
+static void add_to_fc_list(struct list_head *head, struct list_head *entry)
+{
+ if (list_empty(entry))
+ list_add_tail(entry, head);
+}
+
+static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_sq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.sq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
+static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_rq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.rq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -646,7 +702,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
enum fw_wr_opcodes fw_opcode = 0;
enum fw_ri_wr_flags fw_flags;
struct c4iw_qp *qhp;
- union t4_wr *wqe;
+ union t4_wr *wqe = NULL;
u32 num_wrs;
struct t4_swsqe *swsqe;
unsigned long flag;
@@ -675,7 +731,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_flags = 0;
if (wr->send_flags & IB_SEND_SOLICITED)
fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_SIGNALED)
+ if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
fw_flags |= FW_RI_COMPLETION_FLAG;
swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
switch (wr->opcode) {
@@ -736,7 +792,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
swsqe->idx = qhp->wq.sq.pidx;
swsqe->complete = 0;
- swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+ swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+ qhp->sq_sig_all;
swsqe->flushed = 0;
swsqe->wr_id = wr->wr_id;
@@ -750,9 +807,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
t4_sq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
- if (t4_wq_db_enabled(&qhp->wq))
- t4_ring_sq_db(&qhp->wq, idx);
- spin_unlock_irqrestore(&qhp->lock, flag);
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_sq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_sq_db(qhp, idx);
+ }
return err;
}
@@ -761,7 +823,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{
int err = 0;
struct c4iw_qp *qhp;
- union t4_recv_wr *wqe;
+ union t4_recv_wr *wqe = NULL;
u32 num_wrs;
u8 len16 = 0;
unsigned long flag;
@@ -812,9 +874,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wr = wr->next;
num_wrs--;
}
- if (t4_wq_db_enabled(&qhp->wq))
- t4_ring_rq_db(&qhp->wq, idx);
- spin_unlock_irqrestore(&qhp->lock, flag);
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_rq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_rq_db(qhp, idx);
+ }
return err;
}
@@ -1200,35 +1267,6 @@ out:
return ret;
}
-/*
- * Called by the library when the qp has user dbs disabled due to
- * a DB_FULL condition. This function will single-thread all user
- * DB rings to avoid overflowing the hw db-fifo.
- */
-static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
-{
- int delay = db_delay_usecs;
-
- mutex_lock(&qhp->rhp->db_mutex);
- do {
-
- /*
- * The interrupt threshold is dbfifo_int_thresh << 6. So
- * make sure we don't cross that and generate an interrupt.
- */
- if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
- (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
- writel(QID(qid) | PIDX(inc), qhp->wq.db);
- break;
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(delay));
- delay = min(delay << 1, 2000);
- } while (1);
- mutex_unlock(&qhp->rhp->db_mutex);
- return 0;
-}
-
int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
enum c4iw_qp_attr_mask mask,
struct c4iw_qp_attributes *attrs,
@@ -1278,11 +1316,11 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
}
if (mask & C4IW_QP_ATTR_SQ_DB) {
- ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
+ ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
goto out;
}
if (mask & C4IW_QP_ATTR_RQ_DB) {
- ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
+ ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
goto out;
}
@@ -1332,6 +1370,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
if (!internal) {
@@ -1339,30 +1378,30 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
disconnect = 1;
c4iw_get_ep(&qhp->ep->com);
}
- t4_set_wq_in_error(&qhp->wq);
ret = rdma_fini(rhp, qhp, ep);
if (ret)
goto err;
break;
case C4IW_QP_STATE_TERMINATE:
+ t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
- t4_set_wq_in_error(&qhp->wq);
ep = qhp->ep;
- disconnect = 1;
- if (!internal)
+ if (!internal) {
+ c4iw_get_ep(&qhp->ep->com);
terminate = 1;
- else {
+ disconnect = 1;
+ } else {
+ terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
if (ret)
goto err;
}
- c4iw_get_ep(&qhp->ep->com);
break;
case C4IW_QP_STATE_ERROR:
- set_state(qhp, C4IW_QP_STATE_ERROR);
t4_set_wq_in_error(&qhp->wq);
+ set_state(qhp, C4IW_QP_STATE_ERROR);
if (!internal) {
abort = 1;
disconnect = 1;
@@ -1465,14 +1504,6 @@ out:
return ret;
}
-static int enable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_enable_wq_db(&qp->wq);
- return 0;
-}
-
int c4iw_destroy_qp(struct ib_qp *ib_qp)
{
struct c4iw_dev *rhp;
@@ -1490,22 +1521,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
wait_event(qhp->wait, !qhp->ep);
- spin_lock_irq(&rhp->lock);
- remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- rhp->qpcnt--;
- BUG_ON(rhp->qpcnt < 0);
- if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
- rhp->rdev.stats.db_state_transitions++;
- rhp->db_state = NORMAL;
- idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
- }
- if (db_coalescing_threshold >= 0)
- if (rhp->qpcnt <= db_coalescing_threshold)
- cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]);
- spin_unlock_irq(&rhp->lock);
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
atomic_dec(&qhp->refcnt);
wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+ spin_lock_irq(&rhp->lock);
+ if (!list_empty(&qhp->db_fc_entry))
+ list_del_init(&qhp->db_fc_entry);
+ spin_unlock_irq(&rhp->lock);
+
ucontext = ib_qp->uobject ?
to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
destroy_qp(&rhp->rdev, &qhp->wq,
@@ -1516,14 +1540,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
return 0;
}
-static int disable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_disable_wq_db(&qp->wq);
- return 0;
-}
-
struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct ib_udata *udata)
{
@@ -1533,7 +1549,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_cq *schp;
struct c4iw_cq *rchp;
struct c4iw_create_qp_resp uresp;
- int sqsize, rqsize;
+ unsigned int sqsize, rqsize;
struct c4iw_ucontext *ucontext;
int ret;
struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
@@ -1605,25 +1621,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.enable_bind = 1;
qhp->attr.max_ord = 1;
qhp->attr.max_ird = 1;
+ qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
- spin_lock_irq(&rhp->lock);
- if (rhp->db_state != NORMAL)
- t4_disable_wq_db(&qhp->wq);
- rhp->qpcnt++;
- if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
- rhp->rdev.stats.db_state_transitions++;
- rhp->db_state = FLOW_CONTROL;
- idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
- }
- if (db_coalescing_threshold >= 0)
- if (rhp->qpcnt > db_coalescing_threshold)
- cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]);
- ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
- spin_unlock_irq(&rhp->lock);
+ ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
goto err2;
@@ -1692,11 +1696,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
insert_mmap(ucontext, mm2);
mm3->key = uresp.sq_db_gts_key;
- mm3->addr = qhp->wq.sq.udb;
+ mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
mm3->len = PAGE_SIZE;
insert_mmap(ucontext, mm3);
mm4->key = uresp.rq_db_gts_key;
- mm4->addr = qhp->wq.rq.udb;
+ mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
mm4->len = PAGE_SIZE;
insert_mmap(ucontext, mm4);
if (mm5) {
@@ -1709,6 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
}
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
+ INIT_LIST_HEAD(&qhp->db_fc_entry);
PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n",
__func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
qhp->wq.sq.qid);
@@ -1772,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
/*
* Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
* ringing the queue db when we're in DB_FULL mode.
+ * Only allow this on T4 devices.
*/
attrs.sq_db_inc = attr->sq_psn;
attrs.rq_db_inc = attr->rq_psn;
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+ if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+ (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
+ return -EINVAL;
return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index cdef4d7fb6d..67df71a7012 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
kfree(entry);
} else {
qid = c4iw_get_resource(&rdev->resource.qid_table);
- if (!qid)
+ if (!qid) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.fail++;
+ mutex_unlock(&rdev->stats.lock);
goto out;
+ }
mutex_lock(&rdev->stats.lock);
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
@@ -322,8 +326,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
if (!addr)
- printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n",
- pci_name(rdev->lldi.pdev));
+ pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e73ace73918..68b0a6bf4eb 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -84,7 +84,14 @@ struct t4_status_page {
sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
sizeof(struct fw_ri_immd)) & ~31UL)
-#define T4_MAX_FR_DEPTH (1024 / sizeof(u64))
+#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
+#define T4_MAX_FR_DSGL 1024
+#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64))
+
+static inline int t4_max_fr_depth(int use_dsgl)
+{
+ return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH;
+}
#define T4_RQ_NUM_SLOTS 2
#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
@@ -292,7 +299,7 @@ struct t4_sq {
unsigned long phys_addr;
struct t4_swsqe *sw_sq;
struct t4_swsqe *oldest_read;
- u64 udb;
+ u64 __iomem *udb;
size_t memsize;
u32 qid;
u16 in_use;
@@ -300,6 +307,7 @@ struct t4_sq {
u16 cidx;
u16 pidx;
u16 wq_pidx;
+ u16 wq_pidx_inc;
u16 flags;
short flush_cidx;
};
@@ -313,7 +321,7 @@ struct t4_rq {
dma_addr_t dma_addr;
DEFINE_DMA_UNMAP_ADDR(mapping);
struct t4_swrqe *sw_rq;
- u64 udb;
+ u64 __iomem *udb;
size_t memsize;
u32 qid;
u32 msn;
@@ -324,6 +332,7 @@ struct t4_rq {
u16 cidx;
u16 pidx;
u16 wq_pidx;
+ u16 wq_pidx_inc;
};
struct t4_wq {
@@ -433,15 +442,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq)
return wq->sq.size * T4_SQ_NUM_SLOTS;
}
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
+/* This function copies 64 byte coalesced work request to memory
+ * mapped BAR2 space. For coalesced WRs, the SGE fetches data
+ * from the FIFO instead of from Host.
+ */
+static inline void pio_copy(u64 __iomem *dst, u64 *src)
+{
+ int count = 8;
+
+ while (count) {
+ writeq(*src, dst);
+ src++;
+ dst++;
+ count--;
+ }
+}
+
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_wr *wqe)
{
+
+ /* Flush host queue memory writes. */
wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ pio_copy(wq->sq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ writel(PIDX_T5(inc), wq->sq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
}
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_recv_wr *wqe)
{
+
+ /* Flush host queue memory writes. */
wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ pio_copy(wq->rq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ writel(PIDX_T5(inc), wq->rq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
}
@@ -481,6 +542,7 @@ struct t4_cq {
size_t memsize;
__be64 bits_type_ts;
u32 cqid;
+ int vector;
u16 size; /* including status page */
u16 cidx;
u16 sw_pidx;
@@ -566,6 +628,9 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
+
+ /* Ensure CQE is flushed to memory */
+ rmb();
*cqe = &cq->queue[cq->cidx];
ret = 0;
} else
@@ -609,3 +674,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
}
#endif
+
+struct t4_dev_status_page {
+ u8 db_off;
+};
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index dc193c29267..91289a051af 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -836,4 +836,19 @@ struct ulptx_idata {
#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE)
#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U)
+enum { /* TCP congestion control algorithms */
+ CONG_ALG_RENO,
+ CONG_ALG_TAHOE,
+ CONG_ALG_NEWRENO,
+ CONG_ALG_HIGHSPEED
+};
+
+#define S_CONG_CNTRL 14
+#define M_CONG_CNTRL 0x3
+#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
+#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
+
+#define CONG_CNTRL_VALID (1 << 18)
+#define T5_OPT_2_VALID (1 << 31)
+
#endif /* _T4FW_RI_API_H_ */
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index 32b754c35ab..cbd0ce17072 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -48,6 +48,7 @@ struct c4iw_create_cq_resp {
__u32 cqid;
__u32 size;
__u32 qid_mask;
+ __u32 reserved; /* explicit padding (optional for i386) */
};
@@ -70,4 +71,10 @@ struct c4iw_create_qp_resp {
__u32 qid_mask;
__u32 flags;
};
+
+struct c4iw_alloc_ucontext_resp {
+ __u64 status_page_key;
+ __u32 status_page_size;
+ __u32 reserved; /* explicit padding (optional for i386) */
+};
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index f08f6eaf3fa..bd45e0f3923 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -322,7 +322,7 @@ struct ehca_mr_pginfo {
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
- struct ib_umem_chunk *next_chunk;
+ struct scatterlist *next_sg;
u64 next_nmap;
} usr;
struct { /* type EHCA_MR_PGI_FMR section */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 212150c25ea..8cc83753776 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
+ cq = ERR_PTR(-EFAULT);
goto create_cq_exit4;
}
}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index bcfb0c18362..3488e8c9fcb 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -400,10 +400,7 @@ reg_user_mr_fallback:
pginfo.num_hwpages = num_hwpages;
pginfo.u.usr.region = e_mr->umem;
pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
- pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
- (&e_mr->umem->chunk_list),
- list);
-
+ pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
&e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
@@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr;
- u32 i = 0;
u32 j = 0;
int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT ;
- *kpage = pgaddr + (pginfo->next_hwpage *
- pginfo->hwpage_size);
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%llx "
- "chunk->page_list[i]=%llx "
- "i=%x next_hwpage=%llx",
- pgaddr, (u64)sg_dma_address(
- &chunk->page_list[i]),
- i, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- kpage++;
- if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
- (pginfo->kpage_cnt)++;
- (pginfo->u.usr.next_nmap)++;
- pginfo->next_hwpage = 0;
- i++;
- }
- j++;
- if (j >= number) break;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
+ pgaddr = page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT;
+ *kpage = pgaddr + (pginfo->next_hwpage *
+ pginfo->hwpage_size);
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx "
+ "sg_dma_address=%llx "
+ "entry=%llx next_hwpage=%llx",
+ pgaddr, (u64)sg_dma_address(*sg),
+ pginfo->u.usr.next_nmap,
+ pginfo->next_hwpage);
+ return -EFAULT;
}
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ *sg = sg_next(*sg);
+ }
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
* check given pages for contiguous layout
* last page addr is returned in prev_pgaddr for further check
*/
-static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
- int start_idx, int end_idx,
+static int ehca_check_kpages_per_ate(struct scatterlist **sg,
+ int num_pages,
u64 *prev_pgaddr)
{
- int t;
- for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
+ for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
+ u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
if (ehca_debug_level >= 3)
ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
*(u64 *)__va(pgaddr));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
ehca_gen_err("uncontiguous page found pgaddr=%llx "
- "prev_pgaddr=%llx page_list_i=%x",
- pgaddr, *prev_pgaddr, t);
+ "prev_pgaddr=%llx entries_left_in_hwpage=%x",
+ pgaddr, *prev_pgaddr, num_pages);
return -EINVAL;
}
*prev_pgaddr = pgaddr;
@@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr, prev_pgaddr;
- u32 i = 0;
u32 j = 0;
int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
int nr_kpages = kpages_per_hwpage;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT );
- *kpage = pgaddr;
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%llx i=%x",
- pgaddr, i);
+ if (nr_kpages == kpages_per_hwpage) {
+ pgaddr = (page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT);
+ *kpage = pgaddr;
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx entry=%llx",
+ pgaddr, pginfo->u.usr.next_nmap);
+ ret = -EFAULT;
+ return ret;
+ }
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%llx entry=%llx "
+ "mr_pgsize=%llx",
+ pgaddr, pginfo->u.usr.next_nmap,
+ pginfo->hwpage_size);
ret = -EFAULT;
return ret;
}
- /*
- * The first page in a hwpage must be aligned;
- * the first MR page is exempt from this rule.
- */
- if (pgaddr & (pginfo->hwpage_size - 1)) {
- if (pginfo->hwpage_cnt) {
- ehca_gen_err(
- "invalid alignment "
- "pgaddr=%llx i=%x "
- "mr_pgsize=%llx",
- pgaddr, i,
- pginfo->hwpage_size);
- ret = -EFAULT;
- return ret;
- }
- /* first MR page */
- pginfo->kpage_cnt =
- (pgaddr &
- (pginfo->hwpage_size - 1)) >>
- PAGE_SHIFT;
- nr_kpages -= pginfo->kpage_cnt;
- *kpage = pgaddr &
- ~(pginfo->hwpage_size - 1);
- }
- if (ehca_debug_level >= 3) {
- u64 val = *(u64 *)__va(pgaddr);
- ehca_gen_dbg("kpage=%llx chunk_page=%llx "
- "value=%016llx",
- *kpage, pgaddr, val);
- }
- prev_pgaddr = pgaddr;
- i++;
- pginfo->kpage_cnt++;
- pginfo->u.usr.next_nmap++;
- nr_kpages--;
- if (!nr_kpages)
- goto next_kpage;
- continue;
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = pgaddr &
+ ~(pginfo->hwpage_size - 1);
}
- if (i + nr_kpages > chunk->nmap) {
- ret = ehca_check_kpages_per_ate(
- chunk->page_list, i,
- chunk->nmap - 1, &prev_pgaddr);
- if (ret) return ret;
- pginfo->kpage_cnt += chunk->nmap - i;
- pginfo->u.usr.next_nmap += chunk->nmap - i;
- nr_kpages -= chunk->nmap - i;
- break;
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)__va(pgaddr);
+ ehca_gen_dbg("kpage=%llx page=%llx "
+ "value=%016llx",
+ *kpage, pgaddr, val);
}
+ prev_pgaddr = pgaddr;
+ *sg = sg_next(*sg);
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
+ }
+
+ ret = ehca_check_kpages_per_ate(sg, nr_kpages,
+ &prev_pgaddr);
+ if (ret)
+ return ret;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
- ret = ehca_check_kpages_per_ate(chunk->page_list, i,
- i + nr_kpages - 1,
- &prev_pgaddr);
- if (ret) return ret;
- i += nr_kpages;
- pginfo->kpage_cnt += nr_kpages;
- pginfo->u.usr.next_nmap += nr_kpages;
next_kpage:
- nr_kpages = kpages_per_hwpage;
- (pginfo->hwpage_cnt)++;
- kpage++;
- j++;
- if (j >= number) break;
- }
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
+ kpage++;
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -2591,16 +2534,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
/* This is only a stub; nothing to be done here */
}
-static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- return sg->dma_address;
-}
-
-static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size,
enum dma_data_direction dir)
@@ -2653,8 +2586,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
.unmap_page = ehca_dma_unmap_page,
.map_sg = ehca_dma_map_sg,
.unmap_sg = ehca_dma_unmap_sg,
- .dma_address = ehca_dma_address,
- .dma_len = ehca_dma_len,
.sync_single_for_cpu = ehca_dma_sync_single_for_cpu,
.sync_single_for_device = ehca_dma_sync_single_for_device,
.alloc_coherent = ehca_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 00d6861a6a1..2e89356c46f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1329,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
if (!smi_reset2init &&
!ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
- attr_mask)) {
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
ret = -EINVAL;
ehca_err(ibqp->device,
"Invalid qp transition new_state=%x cur_state=%x "
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 714293b7851..45802e97332 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct ipath_diag_pkt odp;
struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
@@ -335,42 +335,24 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */
- if (count < sizeof(odp)) {
- ret = -EINVAL;
- goto bail;
- }
if (count == sizeof(dp)) {
if (copy_from_user(&dp, data, sizeof(dp))) {
ret = -EFAULT;
goto bail;
}
- } else if (copy_from_user(&odp, data, sizeof(odp))) {
- ret = -EFAULT;
- goto bail;
- }
-
- /*
- * Due to padding/alignment issues (lessened with new struct)
- * the old and new structs are the same length. We need to
- * disambiguate them, which we can do because odp.len has never
- * been less than the total of LRH+BTH+DETH so far, while
- * dp.unit (same offset) unit is unlikely to get that high.
- * Similarly, dp.data, the pointer to user at the same offset
- * as odp.unit, is almost certainly at least one (512byte)page
- * "above" NULL. The if-block below can be omitted if compatibility
- * between a new driver and older diagnostic code is unimportant.
- * compatibility the other direction (new diags, old driver) is
- * handled in the diagnostic code, with a warning.
- */
- if (dp.unit >= 20 && dp.data < 512) {
- /* very probable version mismatch. Fix it up */
- memcpy(&odp, &dp, sizeof(odp));
- /* We got a legacy dp, copy elements to dp */
+ } else if (count == sizeof(odp)) {
+ if (copy_from_user(&odp, data, sizeof(odp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ dp.len = odp.len;
dp.unit = odp.unit;
dp.data = odp.data;
- dp.len = odp.len;
- dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+ dp.pbc_wd = 0;
+ } else {
+ ret = -EINVAL;
+ goto bail;
}
/* send count must be an exact number of dwords */
@@ -379,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- clen = dp.len >> 2;
+ plen = dp.len >> 2;
dd = ipath_lookup(dp.unit);
if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -422,16 +404,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
-
- if ((plen + 4) > dd->ipath_ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
- plen - 4, dd->ipath_ibmaxlen);
+ dp.len, dd->ipath_ibmaxlen);
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -473,11 +461,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
- __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
ipath_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen);
ipath_flush_wc();
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 644c2c74e05..123a8c05353 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int ipath_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void ipath_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size,
@@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
}
struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
- ipath_mapping_error,
- ipath_dma_map_single,
- ipath_dma_unmap_single,
- ipath_dma_map_page,
- ipath_dma_unmap_page,
- ipath_map_sg,
- ipath_unmap_sg,
- ipath_sg_dma_address,
- ipath_sg_dma_len,
- ipath_sync_single_for_cpu,
- ipath_sync_single_for_device,
- ipath_dma_alloc_coherent,
- ipath_dma_free_coherent
+ .mapping_error = ipath_mapping_error,
+ .map_single = ipath_dma_map_single,
+ .unmap_single = ipath_dma_unmap_single,
+ .map_page = ipath_dma_map_page,
+ .unmap_page = ipath_dma_unmap_page,
+ .map_sg = ipath_map_sg,
+ .unmap_sg = ipath_unmap_sg,
+ .sync_single_for_cpu = ipath_sync_single_for_cpu,
+ .sync_single_for_device = ipath_sync_single_for_device,
+ .alloc_coherent = ipath_dma_alloc_coherent,
+ .free_coherent = ipath_dma_free_coherent
};
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26dfbc8ee0f..01ba792791a 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
@@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
/* likely due to cancel; so suppress message unless verbose */
if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
/* armlaunch takes precedence; it often causes both. */
ipath_cdbg(VERBOSE,
"Suppressed %s error (%llx) after sendbuf cancel\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index e346d3890a0..5e61e9bff69 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct ipath_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ int n, m, entry;
+ struct scatterlist *sg;
struct ib_mr *ret;
if (length == 0) {
@@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
-
+ n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
@@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
- void *vaddr;
-
- vaddr = page_address(sg_page(&chunk->page_list[i]));
- if (!vaddr) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 0857a9c3cd3..face87602dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -463,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 98ac18ec977..17a517766ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque)
/* ipath_sdma_abort() is done, waiting for interrupt */
if (status == IPATH_SDMA_ABORT_DISARMED) {
- if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+ if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
goto resched_noprint;
/* give up, intr got lost somewhere */
ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -341,7 +341,7 @@ resched:
* JAG - this is bad to just have default be a loop without
* state change
*/
- if (jiffies > dd->ipath_sdma_abort_jiffies) {
+ if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b2144..cc04b7ba348 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
int j;
int ret;
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
+ ret = get_user_pages_fast(addr, npages, 0, pages);
if (ret != npages) {
int i;
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
while (dim) {
const int mxp = 8;
- down_write(&current->mm->mmap_sem);
ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
if (ret <= 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index 24ab11a9ad1..fc01deac1d3 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,6 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
- depends on NETDEVICES && ETHERNET && PCI
+ depends on NETDEVICES && ETHERNET && PCI && INET
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index a251becdaa9..2d8c3397774 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -39,25 +39,6 @@
#include "mlx4_ib.h"
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
- u8 *mac, int *is_mcast, u8 port)
-{
- struct in6_addr in6;
-
- *is_mcast = 0;
-
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
- if (rdma_link_local_addr(&in6))
- rdma_get_ll_mac(&in6, mac);
- else if (rdma_is_multicast_addr(&in6)) {
- rdma_get_mcast_mac(&in6, mac);
- *is_mcast = 1;
- } else
- return -EINVAL;
-
- return 0;
-}
-
static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
@@ -92,21 +73,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
- union ib_gid sgid;
- u8 mac[6];
- int err;
- int is_mcast;
+ int is_mcast = 0;
+ struct in6_addr in6;
u16 vlan_tag;
- err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
- if (err)
- return ERR_PTR(err);
-
- memcpy(ah->av.eth.mac, mac, 6);
- err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
- if (err)
- return ERR_PTR(err);
- vlan_tag = rdma_get_vlan_id(&sgid);
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6)) {
+ is_mcast = 1;
+ rdma_get_mcast_mac(&in6, ah->av.eth.mac);
+ } else {
+ memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+ }
+ vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 2f215b93db6..0eb141c4141 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
- if (slave_id >= dev->dev->num_slaves)
+ if (slave_id >= dev->dev->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d1f5f1dd77b..56a593e0ae5 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -61,6 +61,11 @@ struct cm_generic_msg {
__be32 remote_comm_id;
};
+struct cm_sidr_generic_msg {
+ struct ib_mad_hdr hdr;
+ __be32 request_id;
+};
+
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
@@ -69,28 +74,62 @@ struct cm_req_msg {
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
- msg->local_comm_id = cpu_to_be32(cm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
- return be32_to_cpu(msg->local_comm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->local_comm_id);
+ }
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
- msg->remote_comm_id = cpu_to_be32(cm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
- return be32_to_cpu(msg->remote_comm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->remote_comm_id);
+ }
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
@@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
u32 sl_cm_id;
int pv_cm_id = -1;
- sl_cm_id = get_local_comm_id(mad);
-
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
- } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
return 0;
} else {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
@@ -315,14 +356,18 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
- struct ib_mad *mad)
+ struct ib_mad *mad)
{
u32 pv_cm_id;
struct id_map_entry *id;
- if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
+ if (!slave)
+ return 0;
+
gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
@@ -341,7 +386,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
return -ENOENT;
}
- *slave = id->slave_id;
+ if (slave)
+ *slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d5e60f44ba5..1066eec854a 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
- PAGE_SIZE * 2, &buf->buf);
+ PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
if (err)
goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
goto err_cq;
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
- while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+ if (entries < 1) {
err = -EINVAL;
goto out;
}
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ if (entries > dev->dev->caps.max_cqes) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
@@ -559,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
- unsigned tail, struct mlx4_cqe *cqe)
+ unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@@ -569,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
- wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
- wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
+ if (is_eth) {
+ wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
+ memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
+ memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ }
+
return 0;
}
@@ -589,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
+ int is_eth;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
@@ -773,11 +787,15 @@ repoll:
break;
}
+ is_eth = (rdma_port_get_link_layer(wc->qp->device,
+ (*cur_qp)->port) ==
+ IB_LINK_LAYER_ETHERNET);
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
- return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
+ return use_tunnel_data(*cur_qp, cq, wc, tail,
+ cqe, is_eth);
}
wc->slid = be16_to_cpu(cqe->rlid);
@@ -788,11 +806,21 @@ repoll:
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
- if (rdma_port_get_link_layer(wc->qp->device,
- (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
+ if (is_eth) {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
- else
+ if (be32_to_cpu(cqe->vlan_my_qpn) &
+ MLX4_CQE_VLAN_PRESENT_MASK) {
+ wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
+ MLX4_CQE_VID_MASK;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
+ wc->vlan_id = 0xffff;
+ }
}
return 0;
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 8aee4233b38..c5174098636 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f2a3f48107e..287ad0564ac 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int ret = 0;
u16 tun_pkey_ix;
u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
if (dest_qpt > IB_QPT_GSI)
return -EINVAL;
@@ -477,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (!dest_qpt)
tun_qp = &tun_ctx->qp[0];
else
@@ -509,6 +506,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
ah = ib_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
@@ -540,11 +541,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
- tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
- tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+ if (is_eth) {
+ u16 vlan = 0;
+ if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+ NULL)) {
+ /* VST mode */
+ if (vlan != wc->vlan_id)
+ /* Packet vlan is not the VST-assigned vlan.
+ * Drop the packet.
+ */
+ goto out;
+ else
+ /* Remove the vlan tag before forwarding
+ * the packet to the VF.
+ */
+ vlan = 0xffff;
+ } else {
+ vlan = wc->vlan_id;
+ }
+
+ tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+ memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+ memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+ } else {
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ }
+
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
@@ -580,6 +606,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
int err;
int slave;
u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH)) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
/* Initially assume that this mad is for us */
slave = mlx4_master_func_num(dev->dev);
@@ -602,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
}
/* Class-specific handling */
switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ /* 255 indicates the dom0 */
+ if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+ if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+ return -EPERM;
+ /* for a VF. drop unsolicited MADs */
+ if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+ mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+ slave, mad->mad_hdr.mgmt_class,
+ mad->mad_hdr.method);
+ return -EINVAL;
+ }
+ }
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
(struct ib_sa_mad *) mad))
@@ -1076,8 +1152,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
- enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+ enum ib_qp_type dest_qpt, u16 pkey_index,
+ u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@@ -1099,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (dest_qpt == IB_QPT_SMI) {
src_qpnum = 0;
sqp = &sqp_ctx->qp[0];
@@ -1166,6 +1239,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
+ if (s_mac)
+ memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@@ -1174,6 +1250,22 @@ out:
return ret;
}
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+ return mlx4_get_base_gid_ix(dev->dev, slave, port);
+}
+
+static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ ah_attr->grh.sgid_index = slave;
+ else
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+}
+
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
{
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1184,6 +1276,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
struct ib_ah_attr ah_attr;
u8 *slave_id;
int slave;
+ int port;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1199,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
"belongs to another slave\n", wc->src_qp);
return;
}
- if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
- mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
- "non-master trying to send QP0 packets\n", wc->src_qp);
- return;
- }
/* Map transaction ID */
ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1231,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
/* Class-specific handling */
switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ if (slave != mlx4_master_func_num(dev->dev) &&
+ !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+ return;
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
(struct ib_sa_mad *) &tunnel->mad))
@@ -1260,12 +1354,18 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
ah.ibah.device = ctx->ib_dev;
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if ((ah_attr.ah_flags & IB_AH_GRH) &&
- (ah_attr.grh.sgid_index != slave)) {
- mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
- slave, ah_attr.grh.sgid_index);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
+
+ port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num);
+ if (port < 0)
return;
- }
+ ah_attr.port_num = port;
+ memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ /* if slave have default vlan use it */
+ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1273,7 +1373,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, &tunnel->mad);
+ &ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1657,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
return -EEXIST;
ctx->state = DEMUX_PV_STATE_STARTING;
- /* have QP0 only on port owner, and only if link layer is IB */
- if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
- rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ /* have QP0 only if link layer is IB */
+ if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+ IB_LINK_LAYER_INFINIBAND)
ctx->has_smi = 1;
if (ctx->has_smi) {
@@ -1850,7 +1950,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->port = port;
ctx->ib_dev = &dev->ib_dev;
- for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ for (i = 0;
+ i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev->dev, i);
+
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+
ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
if (ret) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d6c5a73becf..0f7027e7db1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,8 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -46,15 +48,17 @@
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "1.0"
-#define DRV_RELDATE "April 4, 2008"
+#define DRV_VERSION "2.2-1"
+#define DRV_RELDATE "Feb 2014"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -92,21 +96,27 @@ static union ib_gid zgid;
static int check_flow_steering_support(struct mlx4_dev *dev)
{
+ int eth_num_ports = 0;
int ib_num_ports = 0;
- int i;
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
- ib_num_ports++;
-
- if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
- if (ib_num_ports || mlx4_is_mfunc(dev)) {
- pr_warn("Device managed flow steering is unavailable "
- "for IB ports or in multifunction env.\n");
- return 0;
+ int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+ if (dmfs) {
+ int i;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ eth_num_ports++;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+ dmfs &= (!ib_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+ (!eth_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+ if (ib_num_ports && mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+ dmfs = 0;
}
- return 1;
}
- return 0;
+ return dmfs;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
@@ -165,7 +175,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
- if (check_flow_steering_support(dev->dev))
+ if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
@@ -177,18 +187,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
- props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
- props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
@@ -338,7 +348,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
- props->port_cap_flags = IB_PORT_CM_SUP;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
@@ -526,7 +536,6 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
if (IS_ERR(mailbox))
return 0;
- memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
@@ -536,19 +545,16 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
return 0;
}
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
- u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, 256);
-
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
@@ -557,8 +563,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
- err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -567,11 +573,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
struct ib_port_attr attr;
u32 cap_mask;
int err;
- mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+ /* return OK if this is RoCE. CM calls ib_modify_port() regardless
+ * of whether port link layer is ETH or IB. For ETH ports, qkey
+ * violations and port capabilities are not meaningful.
+ */
+ if (is_eth)
+ return 0;
+
+ mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
@@ -580,9 +595,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mlx4_SET_PORT(to_mdev(ibdev), port,
- !!(mask & IB_PORT_RESET_QKEY_CNTR),
- cap_mask);
+ err = mlx4_ib_SET_PORT(mdev, port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
@@ -790,7 +805,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
- u8 mac[6];
struct net_device *ndev;
int ret = 0;
@@ -804,11 +818,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_unlock(&mdev->iboe.lock);
if (ndev) {
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- rtnl_lock();
- dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
ret = 1;
- rtnl_unlock();
dev_put(ndev);
}
@@ -822,6 +832,7 @@ struct mlx4_ib_steering {
};
static int parse_flow_attr(struct mlx4_dev *dev,
+ u32 qp_num,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
@@ -837,6 +848,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
+ case IB_FLOW_SPEC_IB:
+ type = MLX4_NET_TRANS_RULE_ID_IB;
+ mlx4_spec->ib.l3_qpn =
+ cpu_to_be32(qp_num);
+ mlx4_spec->ib.qpn_mask =
+ cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+ break;
+
case IB_FLOW_SPEC_IPV4:
type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -868,6 +887,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
return mlx4_hw_rule_sz(dev, type);
}
+struct default_rules {
+ __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u8 link_layer;
+};
+static const struct default_rules default_table[] = {
+ {
+ .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+ .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+ .rules_create_list = {IB_FLOW_SPEC_IB},
+ .link_layer = IB_LINK_LAYER_INFINIBAND
+ }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr)
+{
+ int i, j, k;
+ void *ib_flow;
+ const struct default_rules *pdefault_rules = default_table;
+ u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+ for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+ pdefault_rules++) {
+ __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ memset(&field_types, 0, sizeof(field_types));
+
+ if (link_layer != pdefault_rules->link_layer)
+ continue;
+
+ ib_flow = flow_attr + 1;
+ /* we assume the specs are sorted */
+ for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+ j < flow_attr->num_of_specs; k++) {
+ union ib_flow_spec *current_flow =
+ (union ib_flow_spec *)ib_flow;
+
+ /* same layer but different type */
+ if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+ (pdefault_rules->mandatory_fields[k] &
+ IB_FLOW_SPEC_LAYER_MASK)) &&
+ (current_flow->type !=
+ pdefault_rules->mandatory_fields[k]))
+ goto out;
+
+ /* same layer, try match next one */
+ if (current_flow->type ==
+ pdefault_rules->mandatory_fields[k]) {
+ j++;
+ ib_flow +=
+ ((union ib_flow_spec *)ib_flow)->size;
+ }
+ }
+
+ ib_flow = flow_attr + 1;
+ for (j = 0; j < flow_attr->num_of_specs;
+ j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+ for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+ /* same layer and same type */
+ if (((union ib_flow_spec *)ib_flow)->type ==
+ pdefault_rules->mandatory_not_fields[k])
+ goto out;
+
+ return i;
+ }
+out:
+ return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+ struct mlx4_ib_dev *mdev,
+ struct ib_qp *qp,
+ const struct default_rules *pdefault_rules,
+ struct _rule_hw *mlx4_spec) {
+ int size = 0;
+ int i;
+
+ for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+ sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ int ret;
+ union ib_flow_spec ib_spec;
+ switch (pdefault_rules->rules_create_list[i]) {
+ case 0:
+ /* no rule */
+ continue;
+ case IB_FLOW_SPEC_IB:
+ ib_spec.type = IB_FLOW_SPEC_IB;
+ ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+ break;
+ default:
+ /* invalid rule */
+ return -EINVAL;
+ }
+ /* We must put empty rule, qpn is being ignored */
+ ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+ mlx4_spec);
+ if (ret < 0) {
+ pr_info("invalid parsing\n");
+ return -EINVAL;
+ }
+
+ mlx4_spec = (void *)mlx4_spec + ret;
+ size += ret;
+ }
+ return size;
+}
+
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
@@ -879,8 +1007,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
- size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
- (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+ int default_flow;
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -905,7 +1032,6 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, rule_size);
ctrl = mailbox->buf;
ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
@@ -916,8 +1042,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ /* Add default flows */
+ default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+ if (default_flow >= 0) {
+ ret = __mlx4_ib_create_default_rules(
+ mdev, qp, default_table + default_flow,
+ mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
for (i = 0; i < flow_attr->num_of_specs; i++) {
- ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+ mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
@@ -1031,6 +1170,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1042,7 +1183,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6, &reg_id);
+ prot, &reg_id);
if (err)
goto err_malloc;
@@ -1061,7 +1202,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
err_malloc:
kfree(ib_steering);
@@ -1089,10 +1230,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
u64 reg_id = 0;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1115,7 +1257,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
if (err)
return err;
@@ -1127,13 +1269,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- if (ndev) {
- rtnl_lock();
- dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
- rtnl_unlock();
+ if (ndev)
dev_put(ndev);
- }
list_del(&ge->list);
kfree(ge);
} else
@@ -1229,7 +1366,8 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
+ struct net_device *dev)
{
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
@@ -1265,162 +1403,437 @@ static void update_gids_task(struct work_struct *work)
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
- else {
- memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+ else
mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw =
+ container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("reset gid table failed\n");
+ goto free;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof(gw->gids));
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn(KERN_WARNING
+ "set port %d command failed\n", gw->port);
}
mlx4_free_cmd_mailbox(dev, mailbox);
+free:
kfree(gw);
}
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+ union ib_gid *gid, int clear,
+ int default_gid)
{
- struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
- struct net_device *tmp;
int i;
- u8 *hits;
- int ret;
- union ib_gid gid;
- int free;
- int found;
int need_update = 0;
- u16 vid;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- hits = kzalloc(128, GFP_ATOMIC);
- if (!hits) {
- ret = -ENOMEM;
- goto out;
- }
+ int free = -1;
+ int found = -1;
+ int max_gids;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
- if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
- gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- vid = rdma_vlan_dev_vlan_id(tmp);
- mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
- found = 0;
- free = -1;
- for (i = 0; i < 128; ++i) {
- if (free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- free = i;
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
- hits[i] = 1;
- found = 1;
+ if (default_gid) {
+ free = 0;
+ } else {
+ max_gids = dev->dev->caps.gid_table_len[port];
+ for (i = 1; i < max_gids; ++i) {
+ if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+ sizeof(*gid)))
+ found = i;
+
+ if (clear) {
+ if (found >= 0) {
+ need_update = 1;
+ dev->iboe.gid_table[port - 1][found] =
+ zgid;
break;
}
- }
+ } else {
+ if (found >= 0)
+ break;
- if (!found) {
- if (tmp == ndev &&
- (memcmp(&dev->iboe.gid_table[port - 1][0],
- &gid, sizeof gid) ||
- !memcmp(&dev->iboe.gid_table[port - 1][0],
- &zgid, sizeof gid))) {
- dev->iboe.gid_table[port - 1][0] = gid;
- ++need_update;
- hits[0] = 1;
- } else if (free >= 0) {
- dev->iboe.gid_table[port - 1][free] = gid;
- hits[free] = 1;
- ++need_update;
- }
+ if (free < 0 &&
+ !memcmp(&dev->iboe.gid_table[port - 1][i],
+ &zgid, sizeof(*gid)))
+ free = i;
}
}
}
- rcu_read_unlock();
- for (i = 0; i < 128; ++i)
- if (!hits[i]) {
- if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- ++need_update;
- dev->iboe.gid_table[port - 1][i] = zgid;
- }
+ if (found == -1 && !clear && free >= 0) {
+ dev->iboe.gid_table[port - 1][free] = *gid;
+ need_update = 1;
+ }
- if (need_update) {
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
- INIT_WORK(&work->work, update_gids_task);
- work->port = port;
- work->dev = dev;
- queue_work(wq, &work->work);
- } else
- kfree(work);
+ if (!need_update)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+ INIT_WORK(&work->work, update_gids_task);
+ work->port = port;
+ work->dev = dev;
+ queue_work(wq, &work->work);
- kfree(hits);
return 0;
+}
-out:
- kfree(work);
- return ret;
+static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
}
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+
+static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
{
- switch (event) {
- case NETDEV_UP:
- case NETDEV_CHANGEADDR:
- update_ipv6_gids(dev, port, 0);
- break;
+ struct update_gid_work *work;
- case NETDEV_DOWN:
- update_ipv6_gids(dev, port, 1);
- dev->iboe.netdevs[port - 1] = NULL;
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
+ memset(work->gids, 0, sizeof(work->gids));
+ INIT_WORK(&work->work, reset_gids_task);
+ work->dev = dev;
+ work->port = port;
+ queue_work(wq, &work->work);
+ return 0;
}
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+ struct mlx4_ib_dev *ibdev, union ib_gid *gid)
{
- update_ipv6_gids(dev, port, 0);
+ struct mlx4_ib_iboe *iboe;
+ int port = 0;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+ rdma_vlan_dev_real_dev(event_netdev) :
+ event_netdev;
+ union ib_gid default_gid;
+
+ mlx4_make_default_gid(real_dev, &default_gid);
+
+ if (!memcmp(gid, &default_gid, sizeof(*gid)))
+ return 0;
+
+ if (event != NETDEV_DOWN && event != NETDEV_UP)
+ return 0;
+
+ if ((real_dev != event_netdev) &&
+ (event == NETDEV_DOWN) &&
+ rdma_link_local_addr((struct in6_addr *)gid))
+ return 0;
+
+ iboe = &ibdev->iboe;
+ spin_lock(&iboe->lock);
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ update_gid_table(ibdev, port, gid,
+ event == NETDEV_DOWN, 0);
+
+ spin_unlock(&iboe->lock);
+ return 0;
+
}
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev)
{
- update_ipv6_gids(dev, port, 1);
+ u8 port = 0;
+ struct mlx4_ib_iboe *iboe;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+ rdma_vlan_dev_real_dev(dev) : dev;
+
+ iboe = &ibdev->iboe;
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ break;
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return 0;
+ else
+ return port;
}
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct mlx4_ib_dev *ibdev;
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+ return NOTIFY_DONE;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
- struct net_device *oldnd;
+ struct inet6_ifaddr *ifa = ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *event_netdev = ifa->idev->dev;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+ return NOTIFY_DONE;
+}
+#endif
+
+#define MLX4_IB_INVALID_MAC ((u64)-1)
+static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ int port)
+{
+ u64 new_smac = 0;
+ u64 release_mac = MLX4_IB_INVALID_MAC;
+ struct mlx4_ib_qp *qp;
+
+ read_lock(&dev_base_lock);
+ new_smac = mlx4_mac_to_u64(dev->dev_addr);
+ read_unlock(&dev_base_lock);
+
+ mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
+ qp = ibdev->qp1_proxy[port - 1];
+ if (qp) {
+ int new_smac_index;
+ u64 old_smac = qp->pri.smac;
+ struct mlx4_update_qp_params update_params;
+
+ if (new_smac == old_smac)
+ goto unlock;
+
+ new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
+
+ if (new_smac_index < 0)
+ goto unlock;
+
+ update_params.smac_index = new_smac_index;
+ if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC,
+ &update_params)) {
+ release_mac = new_smac;
+ goto unlock;
+ }
+
+ qp->pri.smac = new_smac;
+ qp->pri.smac_index = new_smac_index;
+
+ release_mac = old_smac;
+ }
+
+unlock:
+ mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
+ if (release_mac != MLX4_IB_INVALID_MAC)
+ mlx4_unregister_mac(ibdev->dev, port, release_mac);
+}
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev, u8 port)
+{
+ struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+#endif
+ union ib_gid gid;
+
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return;
+
+ /* IPv4 gids */
+ in_dev = in_dev_get(dev);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ /*ifa->ifa_address;*/
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ update_gid_table(ibdev, port, &gid, 0, 0);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 gids */
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ update_gid_table(ibdev, port, pgid, 0, 0);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, u8 port)
+{
+ union ib_gid gid;
+ mlx4_make_default_gid(dev, &gid);
+ update_gid_table(ibdev, port, &gid, 0, 1);
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+ struct net_device *dev;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; ++i)
+ if (reset_gid_table(ibdev, i))
+ return -1;
+
+ read_lock(&dev_base_lock);
+ spin_lock(&iboe->lock);
+
+ for_each_netdev(&init_net, dev) {
+ u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+ if (port)
+ mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
+
+ spin_unlock(&iboe->lock);
+ read_unlock(&dev_base_lock);
+
+ return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
+
+{
struct mlx4_ib_iboe *iboe;
+ int update_qps_port = -1;
int port;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- oldnd = iboe->netdevs[port - 1];
+ enum ib_port_state port_state = IB_PORT_NOP;
+ struct net_device *old_master = iboe->masters[port - 1];
+ struct net_device *curr_netdev;
+ struct net_device *curr_master;
+
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (oldnd != iboe->netdevs[port - 1]) {
- if (iboe->netdevs[port - 1])
- netdev_added(ibdev, port);
- else
- netdev_removed(ibdev, port);
+ if (iboe->netdevs[port - 1])
+ mlx4_ib_set_default_gid(ibdev,
+ iboe->netdevs[port - 1], port);
+ curr_netdev = iboe->netdevs[port - 1];
+
+ if (iboe->netdevs[port - 1] &&
+ netif_is_bond_slave(iboe->netdevs[port - 1])) {
+ iboe->masters[port - 1] = netdev_master_upper_dev_get(
+ iboe->netdevs[port - 1]);
+ } else {
+ iboe->masters[port - 1] = NULL;
+ }
+ curr_master = iboe->masters[port - 1];
+
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
+ event == NETDEV_UP || event == NETDEV_CHANGE))
+ update_qps_port = port;
+
+ if (curr_netdev) {
+ port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ } else {
+ reset_gid_table(ibdev, port);
+ }
+ /* if using bonding/team and a slave port is down, we don't the bond IP
+ * based gids in the table since flows that select port by gid may get
+ * the down port.
+ */
+ if (curr_master && (port_state == IB_PORT_DOWN)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ }
+ /* if bonding is used it is possible that we add it to masters
+ * only after IP address is assigned to the net bonding
+ * interface.
+ */
+ if (curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
- }
- if (dev == iboe->netdevs[0] ||
- (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
- handle_en_event(ibdev, 1, event);
- else if (dev == iboe->netdevs[1]
- || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
- handle_en_event(ibdev, 2, event);
+ if (!curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
+ }
+ }
spin_unlock(&iboe->lock);
+ if (update_qps_port > 0)
+ mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlx4_ib_dev *ibdev;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+ mlx4_ib_scan_netdevs(ibdev, dev, event);
+
return NOTIFY_DONE;
}
@@ -1458,7 +1871,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
- char name[32];
+ char name[80];
int eq_per_port = 0;
int added_eqs = 0;
int total_eqs = 0;
@@ -1488,8 +1901,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
eq = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
- sprintf(name, "mlx4-ib-%d-%d@%s",
- i, j, dev->pdev->bus->name);
+ snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
+ i, j, dev->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
@@ -1539,17 +1952,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
+ int ib_num_ports = 0;
pr_info_once("%s", mlx4_ib_version);
- mlx4_foreach_non_ib_transport_port(i, dev)
- num_ports++;
-
- if (mlx4_is_mfunc(dev) && num_ports) {
- dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
- return NULL;
- }
-
num_ports = 0;
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@@ -1688,12 +2094,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
if (check_flow_steering_support(dev)) {
+ ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
mlx4_ib_alloc_eqs(dev, ibdev);
@@ -1704,20 +2111,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_map;
for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->qp1_proxy_lock[i]);
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
if (err)
ibdev->counters[i] = -1;
- } else
- ibdev->counters[i] = -1;
+ } else {
+ ibdev->counters[i] = -1;
+ }
}
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ ib_num_ports) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN,
+ &ibdev->steer_qpn_base);
+ if (err)
+ goto err_counter;
+
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
if (ib_register_device(&ibdev->ib_dev, NULL))
- goto err_counter;
+ goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@@ -1725,11 +2165,39 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err)
- goto err_sriov;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+ if (!iboe->nb_inet.notifier_call) {
+ iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+ err = register_inetaddr_notifier(&iboe->nb_inet);
+ if (err) {
+ iboe->nb_inet.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!iboe->nb_inet6.notifier_call) {
+ iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+ err = register_inet6addr_notifier(&iboe->nb_inet6);
+ if (err) {
+ iboe->nb_inet6.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#endif
+ for (i = 1 ; i <= ibdev->num_ports ; ++i)
+ reset_gid_table(ibdev, i);
+ rtnl_lock();
+ mlx4_ib_scan_netdevs(ibdev, NULL, 0);
+ rtnl_unlock();
+ mlx4_ib_init_gid_table(ibdev);
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1755,11 +2223,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
return ibdev;
err_notif:
- if (unregister_netdevice_notifier(&ibdev->iboe.nb))
- pr_warn("failure unregistering notifier\n");
+ if (ibdev->iboe.nb.notifier_call) {
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
flush_workqueue(wq);
-err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
@@ -1768,6 +2250,13 @@ err_mad:
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
for (; i; --i)
if (ibdev->counters[i - 1] != -1)
@@ -1788,6 +2277,69 @@ err_dealloc:
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base,
+ get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ int err;
+ size_t flow_size;
+ struct ib_flow_attr *flow = NULL;
+ struct ib_flow_spec_ib *ib_spec;
+
+ if (is_attach) {
+ flow_size = sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_ib);
+ flow = kzalloc(flow_size, GFP_KERNEL);
+ if (!flow)
+ return -ENOMEM;
+ flow->port = mqp->port;
+ flow->num_of_specs = 1;
+ flow->size = flow_size;
+ ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+ ib_spec->type = IB_FLOW_SPEC_IB;
+ ib_spec->size = sizeof(struct ib_flow_spec_ib);
+ /* Add an empty rule for IB L2 */
+ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+ IB_FLOW_DOMAIN_NIC,
+ MLX4_FS_REGULAR,
+ &mqp->reg_id);
+ } else {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ }
+ kfree(flow);
+ return err;
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
@@ -1801,6 +2353,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
+
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
+
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
if (ibdev->counters[p] != -1)
@@ -1821,17 +2393,24 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
+ struct mlx4_active_ports actv_ports;
+ unsigned int ports;
+ unsigned int first_port;
if (!mlx4_is_master(dev))
return;
- dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+
+ dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
goto out;
}
- for (i = 0; i < dev->caps.num_ports; i++) {
+ for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
@@ -1843,9 +2422,9 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
}
}
/* initialize or tear down tunnel QPs for the slave */
- for (i = 0; i < dev->caps.num_ports; i++) {
+ for (i = 0; i < ports; i++) {
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
- dm[i]->port = i + 1;
+ dm[i]->port = first_port + i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 25b2cdff00f..ed327e6c8fd 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
}
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
- return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
- IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
+ return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
+ ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
+ &ah_attr, NULL, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 036b663dd26..369da3ca5d6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -68,6 +68,8 @@ enum {
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS 256
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@@ -153,6 +155,8 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -238,6 +242,22 @@ struct mlx4_ib_proxy_sqp_hdr {
struct mlx4_rcv_tunnel_hdr tun;
} __packed;
+struct mlx4_roce_smac_vlan_info {
+ u64 smac;
+ int smac_index;
+ int smac_port;
+ u64 candidate_smac;
+ int candidate_smac_index;
+ int candidate_smac_port;
+ u16 vid;
+ int vlan_index;
+ int vlan_port;
+ u16 candidate_vid;
+ int candidate_vlan_index;
+ int candidate_vlan_port;
+ int update_vid;
+};
+
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@@ -270,7 +290,9 @@ struct mlx4_ib_qp {
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
-
+ struct mlx4_roce_smac_vlan_info pri;
+ struct mlx4_roce_smac_vlan_info alt;
+ u64 reg_id;
};
struct mlx4_ib_srq {
@@ -428,7 +450,10 @@ struct mlx4_ib_sriov {
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
+ struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
+ struct notifier_block nb_inet;
+ struct notifier_block nb_inet6;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
@@ -494,6 +519,13 @@ struct mlx4_ib_dev {
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
struct pkey_mgt pkeys;
+ unsigned long *ib_uc_qpns_bitmap;
+ int steer_qpn_count;
+ int steer_qpn_base;
+ int steering_support;
+ struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
+ /* lock when destroying qp1_proxy and getting netdev events */
+ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
};
struct ib_event_work {
@@ -675,9 +707,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
- u8 *mac, int *is_mcast, u8 port);
-
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@@ -712,9 +741,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
+
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
+ u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ struct ib_mad *mad);
+
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
@@ -752,5 +784,9 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
__be64 mlx4_ib_gen_node_guid(void);
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index e471f089ff0..cb2a8727f3f 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
- struct ib_umem_chunk *chunk;
- int i, j, k;
+ int i, k, entry;
int n;
int len;
int err = 0;
+ struct scatterlist *sg;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
@@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
i = n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ umem->page_size * k;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (i == PAGE_SIZE / sizeof (u64)) {
+ err = mlx4_write_mtt(dev->dev, mtt, n,
+ i, pages);
+ if (err)
+ goto out;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4f10af2905b..67780452f0c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -90,6 +90,21 @@ enum {
MLX4_RAW_QP_MSGMAX = 31,
};
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+static inline u64 mlx4_mac_to_u64(u8 *addr)
+{
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
+}
+
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
@@ -593,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return !attr->srq;
}
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i])
+ return !!dev->caps.qp0_qkey[i];
+ }
+ return 0;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+ gfp_t gfp)
{
int qpn;
int err;
@@ -610,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
!(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
if (init_attr->qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_PROXY_GSI;
- else if (mlx4_is_master(dev->dev))
- qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
- else
- qp_type = MLX4_IB_QPT_PROXY_SMI;
+ else {
+ if (mlx4_is_master(dev->dev) ||
+ qp0_enabled_vf(dev->dev, sqpn))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
}
qpn = sqpn;
/* add extra sg entry for tunneling */
@@ -628,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return -EINVAL;
if (tnl_init->proxy_qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_TUN_GSI;
- else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+ mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+ tnl_init->port))
qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
else
qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -643,14 +674,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
(qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
} else {
- qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
if (!qp)
return -ENOMEM;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
}
} else
qp = *caller_qp;
@@ -716,19 +751,27 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (dev->steering_support ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ qp->flags |= MLX4_IB_QP_NETIF;
+ else
+ goto err;
+ }
+
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
if (qp_has_rq(init_attr)) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
if (err)
goto err;
*qp->db.db = 0;
}
- if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
err = -ENOMEM;
goto err_db;
}
@@ -738,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -765,12 +807,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->qp_type == IB_QPT_RAW_PACKET)
err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
else
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1,
+ &qpn);
if (err)
goto err_proxy;
}
- err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
goto err_qpn;
@@ -790,8 +836,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return 0;
err_qpn:
- if (!sqpn)
- mlx4_qp_release_range(dev->dev, qpn, 1);
+ if (!sqpn) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ }
err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
@@ -909,11 +959,32 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
{
struct mlx4_ib_cq *send_cq, *recv_cq;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+ }
get_cqs(qp, &send_cq, &recv_cq);
@@ -932,8 +1003,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
- if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
- mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ }
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@@ -980,19 +1055,30 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct mlx4_ib_qp *qp = NULL;
int err;
u16 xrcdn = 0;
+ gfp_t gfp;
+ gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+ GFP_NOIO : GFP_KERNEL;
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
*/
if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
- MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
+ MLX4_IB_SRIOV_TUNNEL_QP |
+ MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (init_attr->qp_type != IB_QPT_UD)
+ return ERR_PTR(-EINVAL);
+ }
+
if (init_attr->create_flags &&
(udata ||
- ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+ ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
init_attr->qp_type != IB_QPT_UD) ||
((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
@@ -1012,14 +1098,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ qp = kzalloc(sizeof *qp, gfp);
if (!qp)
return ERR_PTR(-ENOMEM);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
/* fall through */
case IB_QPT_UD:
{
err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp);
+ udata, 0, &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1037,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
get_sqp_num(to_mdev(pd->device), init_attr),
- &qp);
+ &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1063,6 +1151,12 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
+ if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+ mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ dev->qp1_proxy[mqp->port - 1] = NULL;
+ mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ }
+
pd = get_pd(mqp);
destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
@@ -1144,16 +1238,16 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_qp_path *path, u8 port)
+static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+ u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
+ struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
- int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
- u8 mac[6];
- int is_mcast;
- u16 vlan_tag;
int vidx;
+ int smac_index;
+ int err;
+
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
@@ -1182,36 +1276,105 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
if (is_eth) {
- path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 7) << 3);
-
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
- err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
- if (err)
- return err;
-
- memcpy(path->dmac, mac, 6);
- path->ackto = MLX4_IB_LINK_TYPE_ETH;
- /* use index 0 into MAC table for IBoE */
- path->grh_mylmc &= 0x80;
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 7) << 3);
- vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+ path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
- if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
- return -ENOENT;
-
- path->vlan_index = vidx;
+ if (smac_info->vid < 0x1000) {
+ /* both valid vlan ids */
+ if (smac_info->vid != vlan_tag) {
+ /* different VIDs. unreg old and reg new */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ } else {
+ path->vlan_index = smac_info->vlan_index;
+ }
+ } else {
+ /* no current vlan tag in qp */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ }
+ path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
path->fl = 1 << 6;
+ } else {
+ /* have current vlan tag. unregister it at modify-qp success */
+ if (smac_info->vid < 0x1000) {
+ smac_info->candidate_vid = 0xFFFF;
+ smac_info->update_vid = 1;
+ }
}
- } else
+
+ /* get smac_index for RoCE use.
+ * If no smac was yet assigned, register one.
+ * If one was already assigned, but the new mac differs,
+ * unregister the old one and register the new one.
+ */
+ if (!smac_info->smac || smac_info->smac != smac) {
+ /* register candidate now, unreg if needed, after success */
+ smac_index = mlx4_register_mac(dev->dev, port, smac);
+ if (smac_index >= 0) {
+ smac_info->candidate_smac_index = smac_index;
+ smac_info->candidate_smac = smac;
+ smac_info->candidate_smac_port = port;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ smac_index = smac_info->smac_index;
+ }
+
+ memcpy(path->dmac, ah->dmac, 6);
+ path->ackto = MLX4_IB_LINK_TYPE_ETH;
+ /* put MAC table smac index for IBoE */
+ path->grh_mylmc = (u8) (smac_index) | 0x80;
+ } else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ }
return 0;
}
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->smac),
+ (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+ path, &mqp->pri, port);
+}
+
+static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
+ const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->alt_ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->alt_smac),
+ (qp_attr_mask & IB_QP_ALT_VID) ?
+ qp->alt_vlan_id : 0xffff,
+ path, &mqp->alt, port);
+}
+
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
struct mlx4_ib_gid_entry *ge, *tmp;
@@ -1224,6 +1387,37 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+ struct mlx4_qp_context *context)
+{
+ struct net_device *ndev;
+ u64 u64_mac;
+ int smac_index;
+
+
+ ndev = dev->iboe.netdevs[qp->port - 1];
+ if (ndev) {
+ smac = ndev->dev_addr;
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else {
+ u64_mac = dev->dev->caps.def_mac[qp->port];
+ }
+
+ context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
+ if (!qp->pri.smac) {
+ smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
+ if (smac_index >= 0) {
+ qp->pri.candidate_smac_index = smac_index;
+ qp->pri.candidate_smac = u64_mac;
+ qp->pri.candidate_smac_port = qp->port;
+ context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
+ } else {
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1235,6 +1429,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
+ int steer_qp = 0;
int err = -EINVAL;
context = kzalloc(sizeof *context, GFP_KERNEL);
@@ -1319,6 +1514,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
} else
context->pri_path.counter_index = 0xff;
+
+ if (qp->flags & MLX4_IB_QP_NETIF) {
+ mlx4_ib_steer_qp_reg(dev, qp, 1);
+ steer_qp = 1;
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1329,7 +1529,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+ if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port))
goto out;
@@ -1352,8 +1552,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
- attr->alt_port_num))
+ if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
+ &context->alt_path,
+ attr->alt_port_num))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
@@ -1458,12 +1659,39 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
+ if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
+ context->pri_path.feup = 1 << 7; /* don't fsm */
+ /* handle smac_index */
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
+ err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+ if (err)
+ return -EINVAL;
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ dev->qp1_proxy[qp->port - 1] = qp;
+ }
+ }
}
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
+ if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ int is_eth = rdma_port_get_link_layer(
+ &dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth) {
+ context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
+ optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
+ }
+ }
+
+
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
@@ -1534,23 +1762,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq): NULL);
- if (send_cq != recv_cq)
- mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ if (new_state == IB_QPS_RESET) {
+ if (!ibqp->uobject) {
+ mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (qp->rq.wqe_cnt)
+ *qp->db.db = 0;
+
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
+ }
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
- if (qp->rq.wqe_cnt)
- *qp->db.db = 0;
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
}
-
out:
+ if (err && steer_qp)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
+ if (qp->pri.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
+ } else {
+ if (qp->pri.smac)
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = qp->pri.candidate_smac;
+ qp->pri.smac_index = qp->pri.candidate_smac_index;
+ qp->pri.smac_port = qp->pri.candidate_smac_port;
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+ if (qp->alt.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
+ } else {
+ if (qp->alt.smac)
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = qp->alt.candidate_smac;
+ qp->alt.smac_index = qp->alt.candidate_smac_index;
+ qp->alt.smac_port = qp->alt.candidate_smac_port;
+ }
+ qp->alt.candidate_smac = 0;
+ qp->alt.candidate_smac_index = 0;
+ qp->alt.candidate_smac_port = 0;
+ }
+
+ if (qp->pri.update_vid) {
+ if (err) {
+ if (qp->pri.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
+ qp->pri.candidate_vid);
+ } else {
+ if (qp->pri.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
+ qp->pri.vid);
+ qp->pri.vid = qp->pri.candidate_vid;
+ qp->pri.vlan_port = qp->pri.candidate_vlan_port;
+ qp->pri.vlan_index = qp->pri.candidate_vlan_index;
+ }
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.update_vid) {
+ if (err) {
+ if (qp->alt.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
+ qp->alt.candidate_vid);
+ } else {
+ if (qp->alt.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
+ qp->alt.vid);
+ qp->alt.vid = qp->alt.candidate_vid;
+ qp->alt.vlan_port = qp->alt.candidate_vlan_port;
+ qp->alt.vlan_index = qp->alt.candidate_vlan_index;
+ }
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+
return err;
}
@@ -1561,13 +1879,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
-
+ int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ ll = IB_LINK_LAYER_UNSPECIFIED;
+ } else {
+ int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ }
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, ll)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@@ -1631,6 +1957,19 @@ out:
return err;
}
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i] ||
+ qpn == dev->caps.qp0_tunnel[i]) {
+ *qkey = dev->caps.qp0_qkey[i];
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
@@ -1688,8 +2027,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
- return -EINVAL;
+ if (mlx4_is_master(mdev->dev)) {
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ } else {
+ if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ }
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
@@ -1744,9 +2088,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
- struct net_device *ndev;
union ib_gid sgid;
u16 pkey;
int send_size;
@@ -1770,12 +2114,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
* we must use our own cache */
- sgid.global.subnet_prefix =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- subnet_prefix;
- sgid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid.raw[0]);
+ if (err)
+ return err;
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
@@ -1784,8 +2127,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return err;
}
- vlan = rdma_get_vlan_id(&sgid);
- is_vlan = vlan < 0x1000;
+ if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
+ vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
+ is_vlan = 1;
+ }
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
@@ -1802,6 +2147,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
+ if (is_eth)
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+ else {
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -1817,6 +2165,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid);
+ }
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
@@ -1849,16 +2198,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (is_eth) {
u8 *smac;
+ struct in6_addr in6;
+
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
mlx->sched_prio = cpu_to_be16(pcp);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
/* FIXME: cache smac value? */
- ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
- if (!ndev)
- return -ENODEV;
- smac = ndev->dev_addr;
+ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
+ memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
+ memcpy(&in6, sgid.raw, sizeof(in6));
+
+ if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
+ smac = to_mdev(sqp->qp.ibqp.device)->
+ iboe.netdevs[sqp->qp.port - 1]->dev_addr;
+ else /* use the src mac of the tunnel */
+ smac = ah->av.eth.s_mac;
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
@@ -2059,7 +2415,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr, enum ib_qp_type qpt)
+ struct ib_send_wr *wr,
+ enum mlx4_ib_qp_type qpt)
{
union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
struct mlx4_av sqp_av = {0};
@@ -2072,8 +2429,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
cpu_to_be32(0xf0000000);
memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
- /* This function used only for sending on QP1 proxies */
- dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ if (qpt == MLX4_IB_QPT_PROXY_GSI)
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ else
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
/* Use QKEY from the QP context, which is set by master */
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
@@ -2090,6 +2449,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(hdr.mac, ah->av.eth.mac, 6);
+ hdr.vlan = ah->av.eth.vlan;
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
@@ -2366,11 +2727,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
- }
err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
@@ -2387,16 +2743,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += seglen / 16;
break;
case MLX4_IB_QPT_PROXY_SMI:
- /* don't allow QP0 sends on guests */
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
case MLX4_IB_QPT_PROXY_GSI:
/* If we are tunneling special qps, this is a UD qp.
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
- set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
build_tunnel_header(wr, wqe, &seglen);
@@ -2762,6 +3115,9 @@ done:
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
+
qp_init_attr->sq_sig_type =
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb025fc..62d9285300a 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+ GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 97516eb363b..cb4c66e723b 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -389,8 +389,10 @@ struct mlx4_port {
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
- u8 port_num;
+ struct device_attribute enable_smi_admin;
+ struct device_attribute smi_enabled;
int slave;
+ u8 port_num;
};
@@ -558,6 +560,101 @@ err:
return NULL;
}
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, smi_enabled);
+ ssize_t len = 0;
+
+ if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ ssize_t len = 0;
+
+ if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ int enable;
+
+ if (sscanf(buf, "%i", &enable) != 1 ||
+ enable < 0 || enable > 1)
+ return -EINVAL;
+
+ if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+ return -EINVAL;
+ return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+ int ret;
+
+ /* do not display entries if eth transport, or if master */
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return 0;
+
+ sysfs_attr_init(&p->smi_enabled.attr);
+ p->smi_enabled.show = sysfs_show_smi_enabled;
+ p->smi_enabled.store = NULL;
+ p->smi_enabled.attr.name = "smi_enabled";
+ p->smi_enabled.attr.mode = 0444;
+ ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+ if (ret) {
+ pr_err("failed to create smi_enabled\n");
+ return ret;
+ }
+
+ sysfs_attr_init(&p->enable_smi_admin.attr);
+ p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+ p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+ p->enable_smi_admin.attr.name = "enable_smi_admin";
+ p->enable_smi_admin.attr.mode = 0644;
+ ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+ if (ret) {
+ pr_err("failed to create enable_smi_admin\n");
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ return ret;
+ }
+ return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return;
+
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
@@ -582,8 +679,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
- if (!p->pkey_group.attrs)
+ if (!p->pkey_group.attrs) {
+ ret = -ENOMEM;
goto err_alloc;
+ }
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
@@ -591,13 +690,19 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
- if (!p->gid_group.attrs)
+ if (!p->gid_group.attrs) {
+ ret = -ENOMEM;
goto err_free_pkey;
+ }
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
+ ret = add_vf_smi_entries(p);
+ if (ret)
+ goto err_free_gid;
+
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
@@ -623,6 +728,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
int port;
struct kobject *p, *t;
struct mlx4_port *mport;
+ struct mlx4_active_ports actv_ports;
get_name(dev, name, slave, sizeof name);
@@ -645,7 +751,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
goto err_ports;
}
+ actv_ports = mlx4_get_active_ports(dev->dev, slave);
+
for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
err = add_port(dev, port, slave);
if (err)
goto err_add;
@@ -660,6 +770,7 @@ err_add:
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
+ remove_vf_smi_entries(mport);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
@@ -704,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ remove_vf_smi_entries(port);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 8e6aebfaf8a..10df386c634 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,6 +1,6 @@
config MLX5_INFINIBAND
tristate "Mellanox Connect-IB HCA support"
- depends on NETDEVICES && ETHERNET && PCI && X86
+ depends on NETDEVICES && ETHERNET && PCI
select NET_VENDOR_MELLANOX
select MLX5_CORE
---help---
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03948a..8ae4f896cb4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -73,14 +74,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
}
+static u8 sw_ownership_bit(int n, int nent)
+{
+ return (n & nent) ? 1 : 0;
+}
+
static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
{
void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx5_cqe64 *cqe64;
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
- !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+
+ if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
+ return cqe;
+ } else {
+ return NULL;
+ }
}
static void *next_cqe_sw(struct mlx5_ib_cq *cq)
@@ -351,6 +362,43 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
qp->sq.last_poll = tail;
}
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+ mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+ struct ib_sig_err *item)
+{
+ u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+#define GUARD_ERR (1 << 13)
+#define APPTAG_ERR (1 << 12)
+#define REFTAG_ERR (1 << 11)
+
+ if (syndrome & GUARD_ERR) {
+ item->err_type = IB_SIG_BAD_GUARD;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+ } else
+ if (syndrome & REFTAG_ERR) {
+ item->err_type = IB_SIG_BAD_REFTAG;
+ item->expected = be32_to_cpu(cqe->expected_reftag);
+ item->actual = be32_to_cpu(cqe->actual_reftag);
+ } else
+ if (syndrome & APPTAG_ERR) {
+ item->err_type = IB_SIG_BAD_APPTAG;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
+ } else {
+ pr_err("Got signature completion error with bad syndrome %04x\n",
+ syndrome);
+ }
+
+ item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+ item->key = be32_to_cpu(cqe->mkey);
+}
+
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -360,12 +408,16 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
+ struct mlx5_sig_err_cqe *sig_err_cqe;
+ struct mlx5_core_mr *mmr;
+ struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
void *cqe;
int idx;
+repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
@@ -379,7 +431,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
*/
rmb();
- /* TBD: resize CQ */
+ opcode = cqe64->op_own >> 4;
+ if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
+ if (likely(cq->resize_buf)) {
+ free_cq_buf(dev, &cq->buf);
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ goto repoll;
+ } else {
+ mlx5_ib_warn(dev, "unexpected resize cqe\n");
+ }
+ }
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
@@ -398,7 +461,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
wc->qp = &(*cur_qp)->ibqp;
- opcode = cqe64->op_own >> 4;
switch (opcode) {
case MLX5_CQE_REQ:
wq = &(*cur_qp)->sq;
@@ -449,6 +511,33 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
}
break;
+ case MLX5_CQE_SIG_ERR:
+ sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+
+ read_lock(&dev->mdev.priv.mr_table.lock);
+ mmr = __mlx5_mr_lookup(&dev->mdev,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ if (unlikely(!mmr)) {
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
+ cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
+ return -EINVAL;
+ }
+
+ mr = to_mibmr(mmr);
+ get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
+ mr->sig->sig_err_exists = true;
+ mr->sig->sigerr_count++;
+
+ mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
+ cq->mcq.cqn, mr->sig->err_item.key,
+ mr->sig->err_item.err_type,
+ mr->sig->err_item.sig_err_offset,
+ mr->sig->err_item.expected,
+ mr->sig->err_item.actual);
+
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ goto repoll;
}
return 0;
@@ -503,29 +592,35 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
return err;
buf->cqe_size = cqe_size;
+ buf->nent = nent;
return 0;
}
-static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
-{
- mlx5_buf_free(&dev->mdev, &buf->buf);
-}
-
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct ib_ucontext *context, struct mlx5_ib_cq *cq,
int entries, struct mlx5_create_cq_mbox_in **cqb,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd;
+ size_t ucmdlen;
int page_shift;
int npages;
int ncont;
int err;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
return -EINVAL;
@@ -556,7 +651,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
goto err_db;
}
mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
- (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = to_mucontext(context)->uuari.uars[0].index;
@@ -576,16 +671,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
+static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
- for (i = 0; i < nent; i++) {
- cqe = get_cqe(cq, i);
- cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
- cqe64->op_own = 0xf1;
+ for (i = 0; i < buf->nent; i++) {
+ cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+ cqe64->op_own = MLX5_CQE_INVALID << 4;
}
}
@@ -610,7 +705,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (err)
goto err_db;
- init_cq_buf(cq, entries);
+ init_cq_buf(cq, &cq->buf);
*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
@@ -620,7 +715,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
}
mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = dev->mdev.priv.uuari.uars[0].index;
return 0;
@@ -653,8 +748,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
int eqn;
int err;
+ if (entries < 0)
+ return ERR_PTR(-EINVAL);
+
entries = roundup_pow_of_two(entries + 1);
- if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+ if (entries > dev->mdev.caps.max_cqes)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +845,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
return 0;
}
-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
- u32 rsn)
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
{
- u32 lrsn;
-
- if (srq)
- lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
- else
- lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
-
- return rsn == lrsn;
+ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
}
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +877,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (is_equal_rsn(cqe64, srq, rsn)) {
- if (srq)
+ if (is_equal_rsn(cqe64, rsn)) {
+ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
++nfreed;
} else if (nfreed) {
@@ -823,12 +913,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- return -ENOSYS;
+ struct mlx5_modify_cq_mbox_in *in;
+ struct mlx5_ib_dev *dev = to_mdev(cq->device);
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int err;
+ u32 fsel;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+ return -ENOSYS;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->cqn = cpu_to_be32(mcq->mcq.cqn);
+ fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+ in->ctx.cq_period = cpu_to_be16(cq_period);
+ in->ctx.cq_max_count = cpu_to_be16(cq_count);
+ in->field_select = cpu_to_be32(fsel);
+ err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+ kfree(in);
+
+ if (err)
+ mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
+
+ return err;
+}
+
+static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, struct ib_udata *udata, int *npas,
+ int *page_shift, int *cqe_size)
+{
+ struct mlx5_ib_resize_cq ucmd;
+ struct ib_umem *umem;
+ int err;
+ int npages;
+ struct ib_ucontext *context = cq->buf.umem->context;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ if (err)
+ return err;
+
+ if (ucmd.reserved0 || ucmd.reserved1)
+ return -EINVAL;
+
+ umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+ npas, NULL);
+
+ cq->resize_umem = umem;
+ *cqe_size = ucmd.cqe_size;
+
+ return 0;
+}
+
+static void un_resize_user(struct mlx5_ib_cq *cq)
+{
+ ib_umem_release(cq->resize_umem);
+}
+
+static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, int cqe_size)
+{
+ int err;
+
+ cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
+ if (!cq->resize_buf)
+ return -ENOMEM;
+
+ err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ if (err)
+ goto ex;
+
+ init_cq_buf(cq, cq->resize_buf);
+
+ return 0;
+
+ex:
+ kfree(cq->resize_buf);
+ return err;
+}
+
+static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+}
+
+static int copy_resize_cqes(struct mlx5_ib_cq *cq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_cqe64 *scqe64;
+ struct mlx5_cqe64 *dcqe64;
+ void *start_cqe;
+ void *scqe;
+ void *dcqe;
+ int ssize;
+ int dsize;
+ int i;
+ u8 sw_own;
+
+ ssize = cq->buf.cqe_size;
+ dsize = cq->resize_buf->cqe_size;
+ if (ssize != dsize) {
+ mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
+ return -EINVAL;
+ }
+
+ i = cq->mcq.cons_index;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ start_cqe = scqe;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+ dcqe = get_cqe_from_buf(cq->resize_buf,
+ (i + 1) & (cq->resize_buf->nent),
+ dsize);
+ dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
+ sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
+ memcpy(dcqe, scqe, dsize);
+ dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
+
+ ++i;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ if (scqe == start_cqe) {
+ pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
+ cq->mcq.cqn);
+ return -ENOMEM;
+ }
+ }
+ ++cq->mcq.cons_index;
+ return 0;
}
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
- return -ENOSYS;
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ struct mlx5_modify_cq_mbox_in *in;
+ int err;
+ int npas;
+ int page_shift;
+ int inlen;
+ int uninitialized_var(cqe_size);
+ unsigned long flags;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+ pr_info("Firmware does not support resize CQ\n");
+ return -ENOSYS;
+ }
+
+ if (entries < 1)
+ return -EINVAL;
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries > dev->mdev.caps.max_cqes + 1)
+ return -EINVAL;
+
+ if (entries == ibcq->cqe + 1)
+ return 0;
+
+ mutex_lock(&cq->resize_mutex);
+ if (udata) {
+ err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
+ &cqe_size);
+ } else {
+ cqe_size = 64;
+ err = resize_kernel(dev, cq, entries, cqe_size);
+ if (!err) {
+ npas = cq->resize_buf->buf.npages;
+ page_shift = cq->resize_buf->buf.page_shift;
+ }
+ }
+
+ if (err)
+ goto ex;
+
+ inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto ex_resize;
+ }
+
+ if (udata)
+ mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
+ in->pas, 0);
+ else
+ mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
+
+ in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
+ MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+ MLX5_MODIFY_CQ_MASK_PG_SIZE);
+ in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+ in->ctx.page_offset = 0;
+ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
+ in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
+ in->cqn = cpu_to_be32(cq->mcq.cqn);
+
+ err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
+ if (err)
+ goto ex_alloc;
+
+ if (udata) {
+ cq->ibcq.cqe = entries - 1;
+ ib_umem_release(cq->buf.umem);
+ cq->buf.umem = cq->resize_umem;
+ cq->resize_umem = NULL;
+ } else {
+ struct mlx5_ib_cq_buf tbuf;
+ int resized = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ if (cq->resize_buf) {
+ err = copy_resize_cqes(cq);
+ if (!err) {
+ tbuf = cq->buf;
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ resized = 1;
+ }
+ }
+ cq->ibcq.cqe = entries - 1;
+ spin_unlock_irqrestore(&cq->lock, flags);
+ if (resized)
+ free_cq_buf(dev, &tbuf);
+ }
+ mutex_unlock(&cq->resize_mutex);
+
+ mlx5_vfree(in);
+ return 0;
+
+ex_alloc:
+ mlx5_vfree(in);
+
+ex_resize:
+ if (udata)
+ un_resize_user(cq);
+ else
+ un_resize_kernel(dev, cq);
+ex:
+ mutex_unlock(&cq->resize_mutex);
+ return err;
}
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 256a23344f2..ece028fc47d 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3f831de9a4d..364d4b6937f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -46,8 +46,8 @@
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "1.0"
-#define DRIVER_RELDATE "June 2013"
+#define DRIVER_VERSION "2.2-1"
+#define DRIVER_RELDATE "Feb 2014"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
@@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
{
struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+ char name[MLX5_MAX_EQ_NAME];
struct mlx5_eq *eq, *n;
int ncomp_vec;
int nent;
@@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
goto clean;
}
- snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+ snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
err = mlx5_create_map_eq(&dev->mdev, eq,
i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- eq->name,
- &dev->mdev.priv.uuari.uars[0]);
+ name, &dev->mdev.priv.uuari.uars[0]);
if (err) {
kfree(eq);
goto clean;
@@ -261,8 +261,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_RC_RNR_NAK_GEN;
flags = dev->mdev.caps.flags;
if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
@@ -274,6 +273,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (flags & MLX5_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) {
+ props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ /* At this stage no support for signature handover */
+ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
+ IB_PROT_T10DIF_TYPE_2 |
+ IB_PROT_T10DIF_TYPE_3;
+ props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
+ IB_GUARD_T10DIF_CSUM;
+ }
+ if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)
+ props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
0xffffff;
@@ -301,9 +311,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
- props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
- IB_ATOMIC_HCA : IB_ATOMIC_NONE;
- props->masked_atomic_cap = IB_ATOMIC_HCA;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
@@ -537,34 +546,51 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req req;
+ struct mlx5_ib_alloc_ucontext_req_v2 req;
struct mlx5_ib_alloc_ucontext_resp resp;
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
+ int gross_uuars;
int num_uars;
+ int ver;
int uuarn;
int err;
int i;
+ int reqlen;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- err = ib_copy_from_udata(&req, udata, sizeof(req));
+ memset(&req, 0, sizeof(req));
+ reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
+ if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ ver = 0;
+ else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ ver = 2;
+ else
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&req, udata, reqlen);
if (err)
return ERR_PTR(err);
+ if (req.flags || req.reserved)
+ return ERR_PTR(-EINVAL);
+
if (req.total_num_uuars > MLX5_MAX_UUARS)
return ERR_PTR(-ENOMEM);
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
- req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
+ req.total_num_uuars = ALIGN(req.total_num_uuars,
+ MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
return ERR_PTR(-EINVAL);
- num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
+ num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
+ gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
resp.cache_line_size = L1_CACHE_BYTES;
@@ -586,7 +612,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_ctx;
}
- uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
+ uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
@@ -596,13 +622,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
/*
* clear all fast path uuars
*/
- for (i = 0; i < req.total_num_uuars; i++) {
+ for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
- uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
+ uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;
@@ -624,6 +650,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (err)
goto out_uars;
+ uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
@@ -746,7 +773,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+ NULL, NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
goto err_in;
@@ -1006,6 +1034,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
+ if (port < 1 || port > ibdev->num_ports) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ return;
+ }
+
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
}
@@ -1401,12 +1434,15 @@ static int init_one(struct pci_dev *pdev,
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
+ dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
+ dev->ib_dev.create_mr = mlx5_ib_create_mr;
dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
+ dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 3a5322870b9..8499aec94db 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -44,16 +44,17 @@
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int *ncont, int *order)
{
- struct ib_umem_chunk *chunk;
unsigned long tmp;
unsigned long m;
- int i, j, k;
+ int i, k;
u64 base = 0;
int p = 0;
int skip;
int mask;
u64 len;
u64 pfn;
+ struct scatterlist *sg;
+ int entry;
addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
@@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
skip = 1 << m;
mask = skip - 1;
i = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; j++) {
- len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
- pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- tmp = (unsigned long)pfn;
- m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ tmp = (unsigned long)pfn;
+ m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+ skip = 1 << m;
+ mask = skip - 1;
+ base = pfn;
+ p = 0;
+ } else {
+ if (base + p != pfn) {
+ tmp = (unsigned long)p;
+ m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
- } else {
- if (base + p != pfn) {
- tmp = (unsigned long)p;
- m = find_first_bit(&tmp, sizeof(tmp));
- skip = 1 << m;
- mask = skip - 1;
- base = pfn;
- p = 0;
- }
}
- p++;
- i++;
}
+ p++;
+ i++;
}
+ }
if (i) {
m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
@@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
{
int shift = page_shift - PAGE_SHIFT;
int mask = (1 << shift) - 1;
- struct ib_umem_chunk *chunk;
- int i, j, k;
+ int i, k;
u64 cur = 0;
u64 base;
int len;
+ struct scatterlist *sg;
+ int entry;
i = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; j++) {
- len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
- base = sg_dma_address(&chunk->page_list[j]);
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
- if (umr)
- cur |= 3;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ base = sg_dma_address(sg);
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ cur = base + (k << PAGE_SHIFT);
+ if (umr)
+ cur |= 3;
- pas[i >> shift] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[i >> shift]));
- } else
- mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << PAGE_SHIFT));
- i++;
- }
+ pas[i >> shift] = cpu_to_be64(cur);
+ mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+ i >> shift, be64_to_cpu(pas[i >> shift]));
+ } else
+ mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+ base + (k << PAGE_SHIFT));
+ i++;
}
+ }
}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be915724..f2ccf1a5a29 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -189,12 +189,16 @@ struct mlx5_ib_qp {
int create_type;
u32 pa_lkey;
+
+ /* Store signature errors */
+ bool signature_en;
};
struct mlx5_ib_cq_buf {
struct mlx5_buf buf;
struct ib_umem *umem;
int cqe_size;
+ int nent;
};
enum mlx5_ib_qp_flags {
@@ -220,7 +224,7 @@ struct mlx5_ib_cq {
/* protect resize cq
*/
struct mutex resize_mutex;
- struct mlx5_ib_cq_resize *resize_buf;
+ struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
};
@@ -260,8 +264,9 @@ struct mlx5_ib_mr {
__be64 *pas;
dma_addr_t dma;
int npages;
- struct completion done;
- enum ib_wc_status status;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_create_mkey_mbox_out out;
+ struct mlx5_core_sig_ctx *sig;
};
struct mlx5_ib_fast_reg_page_list {
@@ -270,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list {
dma_addr_t map;
};
+struct mlx5_ib_umr_context {
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+ context->status = -1;
+ init_completion(&context->done);
+}
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
@@ -323,6 +339,7 @@ struct mlx5_cache_ent {
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
+ int pending;
};
struct mlx5_mr_cache {
@@ -358,6 +375,8 @@ struct mlx5_ib_dev {
spinlock_t mr_lock;
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
+ struct timer_list delay_timer;
+ int fill_delay;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -390,6 +409,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
return container_of(mqp, struct mlx5_ib_qp, mqp);
}
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+{
+ return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -489,6 +513,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
@@ -524,6 +551,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status);
static inline void init_query_mad(struct ib_smp *mad)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index bd41df95b6f..afa873bd028 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,16 @@
#include <linux/random.h>
#include <linux/debugfs.h>
#include <linux/export.h>
+#include <linux/delay.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
enum {
- DEF_CACHE_SIZE = 10,
+ MAX_PENDING_REG_MR = 8,
+};
+
+enum {
+ MLX5_UMR_ALIGN = 2048
};
static __be64 *mr_align(__be64 *ptr, int align)
@@ -59,15 +64,67 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
return order - cache->ent[0].order;
}
+static void reg_mr_callback(int status, void *context)
+{
+ struct mlx5_ib_mr *mr = context;
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int c = order2idx(dev, mr->order);
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ u8 key;
+ unsigned long flags;
+ struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+ int err;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ ent->pending--;
+ spin_unlock_irqrestore(&ent->lock, flags);
+ if (status) {
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ if (mr->out.hdr.status) {
+ mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+ mr->out.hdr.status,
+ be32_to_cpu(mr->out.hdr.syndrome));
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+ key = dev->mdev.priv.mkey_key++;
+ spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+ mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+ cache->last_add = jiffies;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ ent->size++;
+ spin_unlock_irqrestore(&ent->lock, flags);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+ &mr->mmr);
+ if (err)
+ pr_err("Error inserting to mr tree. 0x%x\n", -err);
+ write_unlock_irqrestore(&table->lock, flags);
+}
+
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
- int size = sizeof(u64) * npages;
int err = 0;
int i;
@@ -76,87 +133,66 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
return -ENOMEM;
for (i = 0; i < num; i++) {
+ if (ent->pending >= MAX_PENDING_REG_MR) {
+ err = -EAGAIN;
+ break;
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
err = -ENOMEM;
- goto out;
+ break;
}
mr->order = ent->order;
mr->umred = 1;
- mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
- if (!mr->pas) {
- kfree(mr);
- err = -ENOMEM;
- goto out;
- }
- mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, mr->dma)) {
- kfree(mr->pas);
- kfree(mr);
- err = -ENOMEM;
- goto out;
- }
-
+ mr->dev = dev;
in->seg.status = 1 << 6;
in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
in->seg.log2_page_size = 12;
+ spin_lock_irq(&ent->lock);
+ ent->pending++;
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
- sizeof(*in));
+ sizeof(*in), reg_mr_callback,
+ mr, &mr->out);
if (err) {
mlx5_ib_warn(dev, "create mkey failed %d\n", err);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
kfree(mr);
- goto out;
+ break;
}
- cache->last_add = jiffies;
-
- spin_lock(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- ent->size++;
- spin_unlock(&ent->lock);
}
-out:
kfree(in);
return err;
}
static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
- int size;
int err;
int i;
for (i = 0; i < num; i++) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
- } else {
- size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
+ else
kfree(mr);
- }
}
}
@@ -183,9 +219,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
return -EINVAL;
if (var > ent->size) {
- err = add_keys(dev, c, var - ent->size);
- if (err)
- return err;
+ do {
+ err = add_keys(dev, c, var - ent->size);
+ if (err && err != -EAGAIN)
+ return err;
+
+ usleep_range(3000, 5000);
+ } while (err);
} else if (var < ent->size) {
remove_keys(dev, c, ent->size - var);
}
@@ -301,23 +341,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
struct mlx5_ib_dev *dev = ent->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int i = order2idx(dev, ent->order);
+ int err;
if (cache->stopped)
return;
ent = &dev->cache.ent[i];
- if (ent->cur < 2 * ent->limit) {
- add_keys(dev, i, 1);
- if (ent->cur < 2 * ent->limit)
- queue_work(cache->wq, &ent->work);
+ if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+ err = add_keys(dev, i, 1);
+ if (ent->cur < 2 * ent->limit) {
+ if (err == -EAGAIN) {
+ mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+ i + 2);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(3));
+ } else if (err) {
+ mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+ i + 2, err);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(1000));
+ } else {
+ queue_work(cache->wq, &ent->work);
+ }
+ }
} else if (ent->cur > 2 * ent->limit) {
if (!someone_adding(cache) &&
- time_after(jiffies, cache->last_add + 60 * HZ)) {
+ time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
- queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
}
}
}
@@ -357,18 +411,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (!list_empty(&ent->head)) {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
break;
}
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
queue_work(cache->wq, &ent->work);
@@ -395,12 +449,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return;
}
ent = &cache->ent[c];
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
ent->cur++;
if (ent->cur > 2 * ent->limit)
shrink = 1;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (shrink)
queue_work(cache->wq, &ent->work);
@@ -408,33 +462,28 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
- int size;
int err;
+ cancel_delayed_work(&ent->dwork);
while (1) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
- } else {
- size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
+ else
kfree(mr);
- }
}
}
@@ -490,12 +539,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
debugfs_remove_recursive(dev->cache.root);
}
+static void delay_time_func(unsigned long ctx)
+{
+ struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+ dev->fill_delay = 0;
+}
+
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int limit;
- int size;
int err;
int i;
@@ -505,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
return -ENOMEM;
}
+ setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
INIT_LIST_HEAD(&cache->ent[i].head);
spin_lock_init(&cache->ent[i].lock);
@@ -515,13 +571,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->order = i + 2;
ent->dev = dev;
- if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
- size = dev->mdev.profile->mr_cache[i].size;
+ if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
limit = dev->mdev.profile->mr_cache[i].limit;
- } else {
- size = DEF_CACHE_SIZE;
+ else
limit = 0;
- }
+
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
ent->limit = limit;
@@ -540,13 +594,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
int i;
dev->cache.stopped = 1;
- destroy_workqueue(dev->cache.wq);
+ flush_workqueue(dev->cache.wq);
mlx5_mr_cache_debugfs_cleanup(dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
clean_keys(dev, i);
+ destroy_workqueue(dev->cache.wq);
+ del_timer_sync(&dev->delay_timer);
+
return 0;
}
@@ -575,7 +632,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+ NULL);
if (err)
goto err_in;
@@ -650,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
{
- struct mlx5_ib_mr *mr;
+ struct mlx5_ib_umr_context *context;
struct ib_wc wc;
int err;
@@ -663,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
if (err == 0)
break;
- mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
- mr->status = wc.status;
- complete(&mr->done);
+ context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+ context->status = wc.status;
+ complete(&context->done);
}
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
}
@@ -675,21 +733,24 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
- int err;
+ int size = sizeof(u64) * npages;
+ int err = 0;
int i;
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < 1; i++) {
mr = alloc_cached_mr(dev, order);
if (mr)
break;
err = add_keys(dev, order2idx(dev, order), 1);
- if (err) {
- mlx5_ib_warn(dev, "add_keys failed\n");
+ if (err && err != -EAGAIN) {
+ mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
break;
}
}
@@ -697,38 +758,58 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
+ mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+ if (!mr->pas) {
+ err = -ENOMEM;
+ goto free_mr;
+ }
+
+ mlx5_ib_populate_pas(dev, umem, page_shift,
+ mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+
+ mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->dma)) {
+ err = -ENOMEM;
+ goto free_pas;
+ }
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
- /* We serialize polls so one process does not kidnap another's
- * completion. This is not a problem since wr is completed in
- * around 1 usec
- */
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- up(&umrc->sem);
- goto error;
+ goto unmap_dma;
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed\n");
+ err = -EFAULT;
+ }
}
- wait_for_completion(&mr->done);
+
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = len;
+ mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
up(&umrc->sem);
+ dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- if (mr->status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- goto error;
+free_pas:
+ kfree(mr->pas);
+
+free_mr:
+ if (err) {
+ free_cached_mr(dev, mr);
+ return ERR_PTR(err);
}
return mr;
-
-error:
- free_cached_mr(dev, mr);
- return ERR_PTR(err);
}
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
@@ -763,8 +844,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
in->seg.log2_page_size = page_shift;
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+ in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+ 1 << page_shift));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+ NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
@@ -855,24 +938,26 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
int err;
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
up(&umrc->sem);
mlx5_ib_dbg(dev, "err %d\n", err);
goto error;
+ } else {
+ wait_for_completion(&umr_context.done);
+ up(&umrc->sem);
}
- wait_for_completion(&mr->done);
- up(&umrc->sem);
- if (mr->status != IB_WC_SUCCESS) {
+ if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "unreg umr failed\n");
err = -EFAULT;
goto error;
@@ -921,6 +1006,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int access_mode, err;
+ int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ in->seg.status = 1 << 6; /* free */
+ in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ access_mode = MLX5_ACCESS_MODE_MTT;
+
+ if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
+ u32 psv_index[2];
+
+ in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
+ MLX5_MKEY_BSF_EN);
+ in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig) {
+ err = -ENOMEM;
+ goto err_free_in;
+ }
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
+ 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ }
+
+ in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
+ NULL, NULL, NULL);
+ if (err)
+ goto err_destroy_psv;
+
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+ mr->umem = NULL;
+ kfree(in);
+
+ return &mr->ibmr;
+
+err_destroy_psv:
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ }
+err_free_sig:
+ kfree(mr->sig);
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int err;
+
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ kfree(mr->sig);
+ }
+
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+ mr->mmr.key, err);
+ return err;
+ }
+
+ kfree(mr);
+
+ return err;
+}
+
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
@@ -948,7 +1149,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
* TBD not needed - issue 197292 */
in->seg.log2_page_size = PAGE_SHIFT;
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+ NULL, NULL);
kfree(in);
if (err)
goto err_free;
@@ -1005,3 +1207,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
}
+
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+ int ret = 0;
+
+ if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
+ pr_err("Invalid status check mask\n");
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mr_status->fail_status = 0;
+ if (check_mask & IB_MR_CHECK_SIG_STATUS) {
+ if (!mmr->sig) {
+ ret = -EINVAL;
+ pr_err("signature status check requested on a non-signature enabled MR\n");
+ goto done;
+ }
+
+ mmr->sig->sig_status_checked = true;
+ if (!mmr->sig->sig_err_exists)
+ goto done;
+
+ if (ibmr->lkey == mmr->sig->err_item.key)
+ memcpy(&mr_status->sig_err, &mmr->sig->err_item,
+ sizeof(mr_status->sig_err));
+ else {
+ mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
+ mr_status->sig_err.sig_err_offset = 0;
+ mr_status->sig_err.key = mmr->sig->err_item.key;
+ }
+
+ mmr->sig->sig_err_exists = false;
+ mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
+ }
+
+done:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 045f8cdbd30..bbbcf389272 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type)
switch (qp_type) {
case IB_QPT_XRC_INI:
- size = sizeof(struct mlx5_wqe_xrc_seg);
+ size += sizeof(struct mlx5_wqe_xrc_seg);
/* fall through */
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -211,20 +211,25 @@ static int sq_overhead(enum ib_qp_type qp_type)
sizeof(struct mlx5_wqe_raddr_seg);
break;
+ case IB_QPT_XRC_TGT:
+ return 0;
+
case IB_QPT_UC:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg);
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg) +
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg);
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_datagram_seg);
break;
case MLX5_IB_QPT_REG_UMR:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_mkey_seg);
break;
@@ -251,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
- return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
+ return MLX5_SIG_WQE_SIZE;
+ else
+ return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
@@ -270,7 +278,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
return wqe_size;
if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
- mlx5_ib_dbg(dev, "\n");
+ mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
+ wqe_size, dev->mdev.caps.max_sq_desc_sz);
return -EINVAL;
}
@@ -278,11 +287,20 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+ qp->signature_en = true;
+
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+ if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+ mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+ qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+ return -ENOMEM;
+ }
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.max_gs = attr->cap.max_send_sge;
- qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
+ qp->sq.max_post = wq_size / wqe_size;
+ attr->cap.max_send_wr = qp->sq.max_post;
return wq_size;
}
@@ -330,14 +348,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
+static int first_med_uuar(void)
+{
+ return 1;
+}
+
+static int next_uuar(int n)
+{
+ n++;
+
+ while (((n % 4) & 2))
+ n++;
+
+ return n;
+}
+
+static int num_med_uuar(struct mlx5_uuar_info *uuari)
+{
+ int n;
+
+ n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
+ uuari->num_low_latency_uuars - 1;
+
+ return n >= 0 ? n : 0;
+}
+
+static int max_uuari(struct mlx5_uuar_info *uuari)
+{
+ return uuari->num_uars * 4;
+}
+
+static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+{
+ int med;
+ int i;
+ int t;
+
+ med = num_med_uuar(uuari);
+ for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
+ t++;
+ if (t == med)
+ return next_uuar(i);
+ }
+
+ return 0;
+}
+
static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int start_uuar;
int i;
- start_uuar = nuuars - uuari->num_low_latency_uuars;
- for (i = start_uuar; i < nuuars; i++) {
+ for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
if (!test_bit(i, uuari->bitmap)) {
set_bit(i, uuari->bitmap);
uuari->count[i]++;
@@ -350,19 +411,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int minidx = 1;
- int uuarn;
- int end;
+ int minidx = first_med_uuar();
int i;
- end = nuuars - uuari->num_low_latency_uuars;
-
- for (i = 1; i < end; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- continue;
-
+ for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
if (uuari->count[i] < uuari->count[minidx])
minidx = i;
}
@@ -384,11 +436,17 @@ static int alloc_uuar(struct mlx5_uuar_info *uuari,
break;
case MLX5_IB_LATENCY_CLASS_MEDIUM:
- uuarn = alloc_med_class_uuar(uuari);
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_med_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_HIGH:
- uuarn = alloc_high_class_uuar(uuari);
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_high_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_FAST_PATH:
@@ -479,12 +537,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
- int page_shift;
+ int page_shift = 0;
int uar_index;
int npages;
- u32 offset;
+ u32 offset = 0;
int uuarn;
- int ncont;
+ int ncont = 0;
int err;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -500,38 +558,53 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ qp->rq.offset = 0;
+ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
err = set_user_buf_size(dev, qp, &ucmd);
if (err)
goto err_uuar;
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
- goto err_uuar;
+ if (ucmd.buf_addr && qp->buf_size) {
+ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ qp->buf_size, 0, 0);
+ if (IS_ERR(qp->umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ err = PTR_ERR(qp->umem);
+ goto err_uuar;
+ }
+ } else {
+ qp->umem = NULL;
}
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
+ if (qp->umem) {
+ mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+ ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
}
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
*in = mlx5_vzalloc(*inlen);
@@ -539,9 +612,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (qp->umem)
+ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
- cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -570,7 +644,8 @@ err_free:
mlx5_vfree(*in);
err_umem:
- ib_umem_release(qp->umem);
+ if (qp->umem)
+ ib_umem_release(qp->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
@@ -583,7 +658,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- ib_umem_release(qp->umem);
+ if (qp->umem)
+ ib_umem_release(qp->umem);
free_uuar(&context->uuari, qp->uuarn);
}
@@ -599,8 +675,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
int err;
uuari = &dev->mdev.priv.uuari;
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
- qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+ if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
@@ -638,7 +714,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_buf;
}
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
- (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
/* Set "fast registration enabled" for all kernel QPs */
(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -734,6 +811,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+ mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
+ return -EINVAL;
+ } else {
+ qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+ }
+ }
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -805,6 +891,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->wq_sig)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+ if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1280,6 +1369,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX,
},
},
[MLX5_QP_STATE_RTR] = {
@@ -1302,9 +1396,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RNR_TIMEOUT |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_SRQN |
MLX5_QP_OPTPAR_CQN_RCV,
@@ -1314,6 +1410,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
},
},
};
@@ -1530,7 +1631,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
mlx5_st = to_mlx5_st(ibqp->qp_type);
- if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+ if (mlx5_st < 0)
goto out;
optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1593,7 +1694,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
- !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@@ -1651,29 +1753,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
-{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
- } else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = 0;
- }
-}
-
-static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
- struct ib_send_wr *wr)
-{
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
-}
-
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
@@ -1714,6 +1793,27 @@ static __be64 frwr_mkey_mask(void)
return cpu_to_be64(result);
}
+static __be64 sig_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_SIGERR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE |
+ MLX5_MKEY_MASK_BSF_EN;
+
+ return cpu_to_be64(result);
+}
+
static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr, int li)
{
@@ -1747,6 +1847,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
MLX5_MKEY_MASK_PD |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_A |
@@ -1768,7 +1869,7 @@ static u8 get_umr_flags(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+ MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
@@ -1780,7 +1881,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
return;
}
- seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
+ seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
+ MLX5_ACCESS_MODE_MTT;
*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
@@ -1803,7 +1905,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
seg->len = cpu_to_be64(wr->wr.fast_reg.length);
seg->log2_page_size = wr->wr.fast_reg.page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+ mlx5_mkey_variant(wr->wr.fast_reg.rkey));
}
static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1895,6 +1998,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
return 0;
}
+static u16 prot_field_size(enum ib_signature_type type)
+{
+ switch (type) {
+ case IB_SIG_TYPE_T10_DIF:
+ return MLX5_DIF_SIZE;
+ default:
+ return 0;
+ }
+}
+
+static u8 bs_selector(int block_size)
+{
+ switch (block_size) {
+ case 512: return 0x1;
+ case 520: return 0x2;
+ case 4096: return 0x3;
+ case 4160: return 0x4;
+ case 1073741824: return 0x5;
+ default: return 0;
+ }
+}
+
+static int format_selector(struct ib_sig_attrs *attr,
+ struct ib_sig_domain *domain,
+ int *selector)
+{
+
+#define FORMAT_DIF_NONE 0
+#define FORMAT_DIF_CRC_INC 8
+#define FORMAT_DIF_CRC_NO_INC 12
+#define FORMAT_DIF_CSUM_INC 13
+#define FORMAT_DIF_CSUM_NO_INC 14
+
+ switch (domain->sig.dif.type) {
+ case IB_T10DIF_NONE:
+ /* No DIF */
+ *selector = FORMAT_DIF_NONE;
+ break;
+ case IB_T10DIF_TYPE1: /* Fall through */
+ case IB_T10DIF_TYPE2:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = FORMAT_DIF_CRC_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = FORMAT_DIF_CSUM_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ case IB_T10DIF_TYPE3:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CRC_INC :
+ FORMAT_DIF_CRC_NO_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CSUM_INC :
+ FORMAT_DIF_CSUM_NO_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_bsf *bsf, u32 data_size)
+{
+ struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+ struct mlx5_bsf_basic *basic = &bsf->basic;
+ struct ib_sig_domain *mem = &sig_attrs->mem;
+ struct ib_sig_domain *wire = &sig_attrs->wire;
+ int ret, selector;
+
+ memset(bsf, 0, sizeof(*bsf));
+ switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
+ return -EINVAL;
+
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+ mem->sig.dif.type == wire->sig.dif.type) {
+ /* Same block structure */
+ basic->bsf_size_sbs = 1 << 4;
+ if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+ basic->wire.copy_byte_mask |= 0xc0;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= 0x30;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= 0x0f;
+ } else
+ basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
+
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ ret = format_selector(sig_attrs, mem, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->m_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_memory.psv_idx);
+
+ ret = format_selector(sig_attrs, wire, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->w_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_wire.psv_idx);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct mlx5_bsf *bsf;
+ u32 data_len = wr->sg_list->length;
+ u32 data_key = wr->sg_list->lkey;
+ u64 data_va = wr->sg_list->addr;
+ int ret;
+ int wqe_size;
+
+ if (!wr->wr.sig_handover.prot ||
+ (data_key == wr->wr.sig_handover.prot->lkey &&
+ data_va == wr->wr.sig_handover.prot->addr &&
+ data_len == wr->wr.sig_handover.prot->length)) {
+ /**
+ * Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
+ * So need construct:
+ * ------------------
+ * | data_klm |
+ * ------------------
+ * | BSF |
+ * ------------------
+ **/
+ struct mlx5_klm *data_klm = *seg;
+
+ data_klm->bcount = cpu_to_be32(data_len);
+ data_klm->key = cpu_to_be32(data_key);
+ data_klm->va = cpu_to_be64(data_va);
+ wqe_size = ALIGN(sizeof(*data_klm), 64);
+ } else {
+ /**
+ * Source domain contains signature information
+ * So need construct a strided block format:
+ * ---------------------------
+ * | stride_block_ctrl |
+ * ---------------------------
+ * | data_klm |
+ * ---------------------------
+ * | prot_klm |
+ * ---------------------------
+ * | BSF |
+ * ---------------------------
+ **/
+ struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+ struct mlx5_stride_block_entry *data_sentry;
+ struct mlx5_stride_block_entry *prot_sentry;
+ u32 prot_key = wr->wr.sig_handover.prot->lkey;
+ u64 prot_va = wr->wr.sig_handover.prot->addr;
+ u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+ int prot_size;
+
+ sblock_ctrl = *seg;
+ data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+ prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+ prot_size = prot_field_size(sig_attrs->mem.sig_type);
+ if (!prot_size) {
+ pr_err("Bad block size given: %u\n", block_size);
+ return -EINVAL;
+ }
+ sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+ prot_size);
+ sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+ sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+ sblock_ctrl->num_entries = cpu_to_be16(2);
+
+ data_sentry->bcount = cpu_to_be16(block_size);
+ data_sentry->key = cpu_to_be32(data_key);
+ data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
+ prot_sentry->bcount = cpu_to_be16(prot_size);
+ prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
+
+ wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+ sizeof(*prot_sentry), 64);
+ }
+
+ *seg += wqe_size;
+ *size += wqe_size / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ bsf = *seg;
+ ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+ if (ret)
+ return -EINVAL;
+
+ *seg += sizeof(*bsf);
+ *size += sizeof(*bsf) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+ struct ib_send_wr *wr, u32 nelements,
+ u32 length, u32 pdn)
+{
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ u32 sig_key = sig_mr->rkey;
+ u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+ MLX5_ACCESS_MODE_KLM;
+ seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+ MLX5_MKEY_BSF_EN | pdn);
+ seg->len = cpu_to_be64(length);
+ seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+ seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, u32 nelements)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+ umr->klm_octowords = get_klm_octo(nelements);
+ umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+ umr->mkey_mask = sig_mkey_mask();
+}
+
+
+static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ u32 pdn = get_pd(qp)->pdn;
+ u32 klm_oct_size;
+ int region_len, ret;
+
+ if (unlikely(wr->num_sge != 1) ||
+ unlikely(wr->wr.sig_handover.access_flags &
+ IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ unlikely(!sig_mr->sig->sig_status_checked))
+ return -EINVAL;
+
+ /* length of the protected region, data + protection */
+ region_len = wr->sg_list->length;
+ if (wr->wr.sig_handover.prot &&
+ (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
+ wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
+ wr->wr.sig_handover.prot->length != wr->sg_list->length))
+ region_len += wr->wr.sig_handover.prot->length;
+
+ /**
+ * KLM octoword size - if protection was provided
+ * then we use strided block format (3 octowords),
+ * else we use single KLM (1 octoword)
+ **/
+ klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+
+ set_sig_umr_segment(*seg, wr, klm_oct_size);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ ret = set_sig_data_segment(wr, qp, seg, size);
+ if (ret)
+ return ret;
+
+ sig_mr->sig->sig_status_checked = false;
+ return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+ u32 psv_idx, void **seg, int *size)
+{
+ struct mlx5_seg_set_psv *psv_seg = *seg;
+
+ memset(psv_seg, 0, sizeof(*psv_seg));
+ psv_seg->psv_num = cpu_to_be32(psv_idx);
+ switch (domain->sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+ domain->sig.dif.app_tag);
+ psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+ break;
+
+ default:
+ pr_err("Bad signature type given.\n");
+ return 1;
+ }
+
+ return 0;
+}
+
static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
{
@@ -1916,6 +2355,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
if (!li) {
+ if (unlikely(wr->wr.fast_reg.page_list_len >
+ wr->wr.fast_reg.page_list->max_page_list_len))
+ return -ENOMEM;
+
set_frwr_pages(*seg, wr, mdev, pd, writ);
*seg += sizeof(struct mlx5_wqe_data_seg);
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
@@ -1978,6 +2421,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
}
}
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ struct ib_send_wr *wr, int *idx,
+ int *size, int nreq)
+{
+ int err = 0;
+
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ *seg = mlx5_get_send_wqe(qp, *idx);
+ *ctrl = *seg;
+ *(uint32_t *)(*seg + 8) = 0;
+ (*ctrl)->imm = send_ieth(wr);
+ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+ (wr->send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ MLX5_WQE_CTRL_SOLICITED : 0);
+
+ *seg += sizeof(**ctrl);
+ *size = sizeof(**ctrl) / 16;
+
+ return err;
+}
+
+static void finish_wqe(struct mlx5_ib_qp *qp,
+ struct mlx5_wqe_ctrl_seg *ctrl,
+ u8 size, unsigned idx, u64 wr_id,
+ int nreq, u8 fence, u8 next_fence,
+ u32 mlx5_opcode)
+{
+ u8 opmod = 0;
+
+ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+ mlx5_opcode | ((u32)opmod << 24));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->fm_ce_se |= fence;
+ qp->fm_cache = next_fence;
+ if (unlikely(qp->wq_sig))
+ ctrl->signature = wq_sig(ctrl);
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = mlx5_opcode;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+}
+
+
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1985,13 +2481,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = &dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf = qp->bf;
int uninitialized_var(size);
void *qend = qp->sq.qend;
unsigned long flags;
- u32 mlx5_opcode;
unsigned idx;
int err = 0;
int inl = 0;
@@ -2000,7 +2496,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
u8 next_fence = 0;
- u8 opmod = 0;
u8 fence;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -2013,36 +2508,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+ fence = qp->fm_cache;
+ num_sge = wr->num_sge;
+ if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
- fence = qp->fm_cache;
- num_sge = wr->num_sge;
- if (unlikely(num_sge > qp->sq.max_gs)) {
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
- idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- seg = mlx5_get_send_wqe(qp, idx);
- ctrl = seg;
- *(uint32_t *)(seg + 8) = 0;
- ctrl->imm = send_ieth(wr);
- ctrl->fm_ce_se = qp->sq_signal_bits |
- (wr->send_flags & IB_SEND_SIGNALED ?
- MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- MLX5_WQE_CTRL_SOLICITED : 0);
-
- seg += sizeof(*ctrl);
- size = sizeof(*ctrl) / 16;
-
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
@@ -2063,28 +2545,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(seg, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
-
- set_atomic_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_atomic_seg);
-
- size += (sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_atomic_seg)) / 16;
- break;
-
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- set_raddr_seg(seg, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
-
- set_masked_atomic_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_masked_atomic_seg);
-
- size += (sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
- break;
+ mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
case IB_WR_LOCAL_INV:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
@@ -2112,6 +2577,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = 0;
break;
+ case IB_WR_REG_SIG_MR:
+ qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
+ mr = to_mmr(wr->wr.sig_handover.sig_mr);
+
+ ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
+ err = set_sig_umr_wr(wr, qp, &seg, &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_UMR);
+ /*
+ * SET_PSV WQEs are not signaled and solicited
+ * on error
+ */
+ wr->send_flags &= ~IB_SEND_SIGNALED;
+ wr->send_flags |= IB_SEND_SOLICITED;
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ num_sge = 0;
+ goto skip_psv;
+
default:
break;
}
@@ -2192,22 +2724,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
}
- mlx5_opcode = mlx5_ib_opcode[wr->opcode];
- ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
- mlx5_opcode |
- ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
- ctrl->fm_ce_se |= get_fence(fence, wr);
- qp->fm_cache = next_fence;
- if (unlikely(qp->wq_sig))
- ctrl->signature = wq_sig(ctrl);
-
- qp->sq.wrid[idx] = wr->wr_id;
- qp->sq.w_list[idx].opcode = mlx5_opcode;
- qp->sq.wqe_head[idx] = qp->sq.head + nreq;
- qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
-
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ get_fence(fence, wr), next_fence,
+ mlx5_ib_opcode[wr->opcode]);
+skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
@@ -2223,6 +2743,10 @@ out:
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell */
+ wmb();
+
if (bf->need_lock)
spin_lock(&bf->lock);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 84d297afd6a..384af6dec5e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd;
+ size_t ucmdlen;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
+
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -123,7 +135,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in;
}
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
return 0;
@@ -192,7 +204,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
return 0;
@@ -295,7 +307,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
mlx5_vfree(in);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
- goto err_srq;
+ goto err_usr_kern_srq;
}
mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
@@ -316,6 +328,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
err_core:
mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+
+err_usr_kern_srq:
if (pd->uobject)
destroy_srq_user(pd, srq);
else
@@ -388,9 +402,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
- kfree(msrq->wrid);
- mlx5_buf_free(&dev->mdev, &msrq->buf);
- mlx5_db_free(&dev->mdev, &msrq->db);
+ destroy_srq_kernel(dev, msrq);
}
kfree(srq);
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index a886de3e593..d0ba264ac1e 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -62,6 +62,13 @@ struct mlx5_ib_alloc_ucontext_req {
__u32 num_low_latency_uuars;
};
+struct mlx5_ib_alloc_ucontext_req_v2 {
+ __u32 total_num_uuars;
+ __u32 num_low_latency_uuars;
+ __u32 flags;
+ __u32 reserved;
+};
+
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
@@ -84,6 +91,7 @@ struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u32 cqe_size;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_cq_resp {
@@ -93,12 +101,16 @@ struct mlx5_ib_create_cq_resp {
struct mlx5_ib_resize_cq {
__u64 buf_addr;
+ __u16 cqe_size;
+ __u16 reserved0;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_srq_resp {
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 7c9d35f39d7..69020173899 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -357,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
eqe->type, eqe->subtype, eq->eqn);
break;
- };
+ }
set_eqe_hw(eqe);
++eq->cons_index;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 87897b95666..ded76c101dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -858,13 +858,9 @@ static int mthca_enable_msi_x(struct mthca_dev *mdev)
entries[1].entry = 1;
entries[2].entry = 2;
- err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err) {
- if (err > 0)
- mthca_info(mdev, "Only %d MSI-X vectors available, "
- "not using MSI-X\n", err);
+ err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err)
return err;
- }
mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 5b71d43bd89..415f8e1a54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
+ err = -EFAULT;
goto err_free;
}
@@ -976,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
int write_mtt_size;
@@ -1009,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
@@ -1030,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- mr->umem->page_size * k;
- /*
- * Be friendly to write_mtt and pass it chunks
- * of appropriate size.
- */
- if (i == write_mtt_size) {
- err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
- if (err)
- goto mtt_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 26a68453610..e354b2f04ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -860,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 429141078ee..3b2a6dc8ea9 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION);
int max_mtu = 9000;
int interrupt_mod_interval = 0;
-
/* Interoperability */
int mpa_version = 1;
module_param(mpa_version, int, 0644);
@@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = {
MODULE_DEVICE_TABLE(pci, nes_pci_table);
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
static int nes_net_event(struct notifier_block *, unsigned long, void *);
static int nes_notifiers_registered;
@@ -672,11 +681,25 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
}
nes_notifiers_registered++;
+ if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+ printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
+ __func__, __LINE__);
+
+ ret = iwpm_init(RDMA_NL_NES);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
+ pci_name(pcidev));
+ goto bail7;
+ }
+
INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
/* Initialize network devices */
- if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL)
+ netdev = nes_netdev_init(nesdev, mmio_regs);
+ if (netdev == NULL) {
+ ret = -ENOMEM;
goto bail7;
+ }
/* Register network device */
ret = register_netdev(netdev);
@@ -707,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+ ibnl_remove_client(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
@@ -770,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev)
nesdev->nesadapter->netdev_count--;
}
}
+ ibnl_remove_client(RDMA_NL_NES);
+ iwpm_exit(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 33cc58941a3..bd9d132f11c 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -51,6 +51,8 @@
#include <rdma/ib_pack.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
#define NES_SEND_FIRST_WRITE
@@ -130,6 +132,7 @@
#define NES_DBG_IW_TX 0x00040000
#define NES_DBG_SHUTDOWN 0x00080000
#define NES_DBG_PAU 0x00100000
+#define NES_DBG_NLMSG 0x00200000
#define NES_DBG_RSVD1 0x10000000
#define NES_DBG_RSVD2 0x20000000
#define NES_DBG_RSVD3 0x40000000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6b29249aa85..6f09a72e78d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -59,6 +59,7 @@
#include <net/route.h>
#include <net/ip_fib.h>
#include <net/tcp.h>
+#include <linux/fcntl.h>
#include "nes.h"
@@ -128,6 +129,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8);
static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
static void print_core(struct nes_cm_core *core);
+static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
@@ -165,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
{
return rem_ref_cm_node(cm_node->cm_core, cm_node);
}
-
/**
* create_event
*/
@@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
}
}
-
if (priv_data_len + mpa_hdr_len != len) {
nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
" complete (%x + %x != %x)\n",
@@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
/* send reset */
return -EINVAL;
}
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
/* responder */
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
- } else {
- /* initiator */
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
-
- if (cm_node->ird_size < ord_size) {
- /* no resources available */
- /* send terminate message */
- return -EINVAL;
+ if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ /* we are still negotiating */
+ if (ord_size > NES_MAX_IRD) {
+ cm_node->ird_size = NES_MAX_IRD;
+ } else {
+ cm_node->ird_size = ord_size;
+ if (ord_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ cm_node->ird_size = 1;
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
+ __func__, ord_size);
+ }
+ }
+ if (ird_size > NES_MAX_ORD)
+ cm_node->ord_size = NES_MAX_ORD;
+ else
+ cm_node->ord_size = ird_size;
+ } else { /* initiator */
+ if (ord_size > NES_MAX_IRD) {
+ nes_debug(NES_DBG_CM,
+ "%s: Unable to support the requested (ord =%u)\n",
+ __func__, ord_size);
+ return -EINVAL;
+ }
+ cm_node->ird_size = ord_size;
+
+ if (ird_size > NES_MAX_ORD) {
+ cm_node->ord_size = NES_MAX_ORD;
+ } else {
+ if (ird_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
+ __func__, ird_size);
+ return -EINVAL;
+ } else {
+ cm_node->ord_size = ird_size;
+ }
+ }
}
}
if (rtr_ctrl_ord & IETF_RDMA0_READ) {
cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+
} else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
} else { /* Not supported RDMA0 operation */
@@ -450,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
iph->ttl = 0x40;
iph->protocol = 0x06; /* IPPROTO_TCP */
- iph->saddr = htonl(cm_node->loc_addr);
- iph->daddr = htonl(cm_node->rem_addr);
+ iph->saddr = htonl(cm_node->mapped_loc_addr);
+ iph->daddr = htonl(cm_node->mapped_rem_addr);
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
+ tcph->source = htons(cm_node->mapped_loc_port);
+ tcph->dest = htons(cm_node->mapped_rem_port);
tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
if (flags & SET_ACK) {
@@ -493,6 +525,100 @@ static void form_cm_frame(struct sk_buff *skb,
cm_packets_created++;
}
+/*
+ * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
+ */
+static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
+ struct sockaddr_storage *addr)
+{
+ struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
+ nes_sockaddr->sin_family = AF_INET;
+ memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
+ nes_sockaddr->sin_port = port;
+}
+
+/*
+ * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
+ */
+static int nes_create_mapinfo(struct nes_cm_info *cm_info)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &local_sockaddr);
+ nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
+ htons(cm_info->mapped_loc_port), &mapped_sockaddr);
+
+ return iwpm_create_mapinfo(&local_sockaddr,
+ &mapped_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
+ * and send a remove mapping op message to
+ * the userspace port mapper
+ */
+static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
+ u32 mapped_loc_addr, u16 mapped_loc_port)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
+ nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
+ &mapped_sockaddr);
+
+ iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
+ return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void nes_form_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &pm_msg->loc_addr);
+ nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
+ &pm_msg->rem_addr);
+}
+
+/*
+ * nes_form_reg_msg - Form a port mapper message with dev info
+ */
+static void nes_form_reg_msg(struct nes_vnic *nesvnic,
+ struct iwpm_dev_data *pm_msg)
+{
+ memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
+ IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
+}
+
+/*
+ * nes_record_pm_msg - Save the received mapping info
+ */
+static void nes_record_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ struct sockaddr_in *mapped_loc_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_loc_addr;
+ struct sockaddr_in *mapped_rem_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+
+ if (mapped_loc_addr->sin_family == AF_INET) {
+ cm_info->mapped_loc_addr =
+ ntohl(mapped_loc_addr->sin_addr.s_addr);
+ cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
+ }
+ if (mapped_rem_addr->sin_family == AF_INET) {
+ cm_info->mapped_rem_addr =
+ ntohl(mapped_rem_addr->sin_addr.s_addr);
+ cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
+ }
+}
+
/**
* print_core - dump a cm core
*/
@@ -514,6 +640,19 @@ static void print_core(struct nes_cm_core *core)
nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
}
+static void record_ird_ord(struct nes_cm_node *cm_node,
+ u16 conn_ird, u16 conn_ord)
+{
+ if (conn_ird > NES_MAX_IRD)
+ conn_ird = NES_MAX_IRD;
+
+ if (conn_ord > NES_MAX_ORD)
+ conn_ord = NES_MAX_ORD;
+
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
/**
* cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
*/
@@ -557,11 +696,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
/* initialize RTR msg */
- ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ird_size;
- ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ord_size;
-
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
+ ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
+ }
ctrl_ird |= IETF_PEER_TO_PEER;
ctrl_ird |= IETF_FLPDU_ZERO_LEN;
@@ -610,7 +751,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
struct nes_qp *nesqp = *nesqp_addr;
struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
- u64temp = (unsigned long)nesqp;
+ u64temp = (unsigned long)nesqp->nesuqp_addr;
u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
@@ -1100,8 +1241,11 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
loc_addr, loc_port,
cm_node->rem_addr, cm_node->rem_port,
rem_addr, rem_port);
- if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
- (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+ if ((cm_node->mapped_loc_addr == loc_addr) &&
+ (cm_node->mapped_loc_port == loc_port) &&
+ (cm_node->mapped_rem_addr == rem_addr) &&
+ (cm_node->mapped_rem_port == rem_port)) {
+
add_ref_cm_node(cm_node);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
return cm_node;
@@ -1118,18 +1262,28 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
* find_listener - find a cm node listening on this addr-port pair
*/
static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
- nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+ nes_addr_t dst_addr, u16 dst_port,
+ enum nes_cm_listener_state listener_state, int local)
{
unsigned long flags;
struct nes_cm_listener *listen_node;
+ nes_addr_t listen_addr;
+ u16 listen_port;
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+ if (local) {
+ listen_addr = listen_node->loc_addr;
+ listen_port = listen_node->loc_port;
+ } else {
+ listen_addr = listen_node->mapped_loc_addr;
+ listen_port = listen_node->mapped_loc_port;
+ }
/* compare node pair, return node handle if a match */
- if (((listen_node->loc_addr == dst_addr) ||
- listen_node->loc_addr == 0x00000000) &&
- (listen_node->loc_port == dst_port) &&
+ if (((listen_addr == dst_addr) ||
+ listen_addr == 0x00000000) &&
+ (listen_port == dst_port) &&
(listener_state & listen_node->listener_state)) {
atomic_inc(&listen_node->ref_count);
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
@@ -1142,7 +1296,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
return NULL;
}
-
/**
* add_hte_node - add a cm node to the hash table
*/
@@ -1263,9 +1416,20 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- if (listener->nesvnic)
- nes_manage_apbvt(listener->nesvnic, listener->loc_port,
- PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+ if (listener->nesvnic) {
+ nes_manage_apbvt(listener->nesvnic,
+ listener->mapped_loc_port,
+ PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
+ nes_remove_mapinfo(listener->loc_addr,
+ listener->loc_port,
+ listener->mapped_loc_addr,
+ listener->mapped_loc_port);
+ nes_debug(NES_DBG_NLMSG,
+ "Delete APBVT mapped_loc_port = %04X\n",
+ listener->mapped_loc_port);
+ }
nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
@@ -1354,8 +1518,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
neigh->ha, ntohl(rt->rt_gateway));
if (arpindex >= 0) {
- if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
- neigh->ha, ETH_ALEN)) {
+ if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
/* Mac address same as in nes_arp_table */
goto out;
}
@@ -1408,10 +1571,16 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loc_port = cm_info->loc_port;
cm_node->rem_port = cm_info->rem_port;
+ cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
+ cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
+ cm_node->mapped_loc_port = cm_info->mapped_loc_port;
+ cm_node->mapped_rem_port = cm_info->mapped_rem_port;
+
cm_node->mpa_frame_rev = mpa_version;
cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->ird_size = IETF_NO_IRD_ORD;
- cm_node->ord_size = IETF_NO_IRD_ORD;
+ cm_node->mpav2_ird_ord = 0;
+ cm_node->ird_size = 0;
+ cm_node->ord_size = 0;
nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
&cm_node->loc_addr, cm_node->loc_port,
@@ -1453,8 +1622,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loopbackpartner = NULL;
/* get the mac addr for the remote node */
- oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
- arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+ oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
+ NULL, NES_ARP_RESOLVE);
+ arpindex = nes_addr_resolve_neigh(nesvnic,
+ cm_node->mapped_rem_addr, oldarpindex);
if (arpindex < 0) {
kfree(cm_node);
return NULL;
@@ -1516,11 +1687,14 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
} else {
if (cm_node->apbvt_set && cm_node->nesvnic) {
- nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(
- cm_node->nesvnic->nesdev->pcidev->devfn),
+ nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+ PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
NES_MANAGE_APBVT_DEL);
}
+ nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
+ cm_node->mapped_loc_port);
+ nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
+ cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
}
atomic_dec(&cm_core->node_cnt);
@@ -2188,17 +2362,21 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
* mini_cm_listen - create a listen node with params
*/
static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
{
struct nes_cm_listener *listener;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
unsigned long flags;
+ int iwpm_err = 0;
nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
cm_info->loc_addr, cm_info->loc_port);
/* cannot have multiple matching listeners */
- listener = find_listener(cm_core, htonl(cm_info->loc_addr),
- htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+ listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
+ NES_CM_LISTENER_EITHER_STATE, 1);
+
if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
/* find automatically incs ref count ??? */
atomic_dec(&listener->ref_count);
@@ -2207,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
}
if (!listener) {
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(cm_info, &pm_msg);
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(cm_info, &pm_msg);
+ }
+
/* create a CM listen node (1/2 node to compare incoming traffic to) */
listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
if (!listener) {
@@ -2214,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
return NULL;
}
- listener->loc_addr = htonl(cm_info->loc_addr);
- listener->loc_port = htons(cm_info->loc_port);
+ listener->loc_addr = cm_info->loc_addr;
+ listener->loc_port = cm_info->loc_port;
+ listener->mapped_loc_addr = cm_info->mapped_loc_addr;
+ listener->mapped_loc_port = cm_info->mapped_loc_port;
listener->reused_node = 0;
atomic_set(&listener->ref_count, 1);
@@ -2277,14 +2473,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
if (cm_info->loc_addr == cm_info->rem_addr) {
loopbackremotelistener = find_listener(cm_core,
- ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
- NES_CM_LISTENER_ACTIVE_STATE);
+ cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
if (loopbackremotelistener == NULL) {
create_event(cm_node, NES_CM_EVENT_ABORTED);
} else {
loopback_cm_info = *cm_info;
loopback_cm_info.loc_port = cm_info->rem_port;
loopback_cm_info.rem_port = cm_info->loc_port;
+ loopback_cm_info.mapped_loc_port =
+ cm_info->mapped_rem_port;
+ loopback_cm_info.mapped_rem_port =
+ cm_info->mapped_loc_port;
loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
loopbackremotenode = make_cm_node(cm_core, nesvnic,
&loopback_cm_info, loopbackremotelistener);
@@ -2513,6 +2713,12 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
nfo.rem_addr = ntohl(iph->saddr);
nfo.rem_port = ntohs(tcph->source);
+ /* If port mapper is available these should be mapped address info */
+ nfo.mapped_loc_addr = ntohl(iph->daddr);
+ nfo.mapped_loc_port = ntohs(tcph->dest);
+ nfo.mapped_rem_addr = ntohl(iph->saddr);
+ nfo.mapped_rem_port = ntohs(tcph->source);
+
tmp_daddr = cpu_to_be32(iph->daddr);
tmp_saddr = cpu_to_be32(iph->saddr);
@@ -2521,8 +2727,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
do {
cm_node = find_node(cm_core,
- nfo.rem_port, nfo.rem_addr,
- nfo.loc_port, nfo.loc_addr);
+ nfo.mapped_rem_port, nfo.mapped_rem_addr,
+ nfo.mapped_loc_port, nfo.mapped_loc_addr);
if (!cm_node) {
/* Only type of packet accepted are for */
@@ -2531,9 +2737,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
skb_handled = 0;
break;
}
- listener = find_listener(cm_core, nfo.loc_addr,
- nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
+ listener = find_listener(cm_core, nfo.mapped_loc_addr,
+ nfo.mapped_loc_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
if (!listener) {
nfo.cm_id = NULL;
nfo.conn_type = 0;
@@ -3028,11 +3234,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
rem_ref_cm_node(cm_node->cm_core, cm_node);
return -ECONNRESET;
}
-
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
+
nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
atomic_inc(&cm_accepts);
@@ -3055,6 +3261,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (cm_node->mpa_frame_rev == IETF_MPA_V1)
mpa_frame_offset = 4;
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
+ cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ }
+
memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
conn_param->private_data_len);
@@ -3118,7 +3329,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
nesqp->skip_lsmm = 1;
-
/* Cache the cm_id in the qp */
nesqp->cm_id = cm_id;
cm_node->cm_id = cm_id;
@@ -3133,10 +3343,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nes_cm_init_tsa_conn(nesqp, cm_node);
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3155,14 +3367,14 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)conn_param->ord);
+ cpu_to_le32((u32)cm_node->ord_size);
memset(&nes_quad, 0, sizeof(nes_quad));
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
- nes_quad.TcpPorts[0] = raddr->sin_port;
- nes_quad.TcpPorts[1] = laddr->sin_port;
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3195,6 +3407,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
+ cm_event.ird = cm_node->ird_size;
+ cm_event.ord = cm_node->ord_size;
+
ret = cm_id->event_handler(cm_id, &cm_event);
attr.qp_state = IB_QPS_RTS;
nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
@@ -3261,6 +3476,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int apbvt_set = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
if (cm_id->remote_addr.ss_family != AF_INET)
return -ENOSYS;
@@ -3291,33 +3509,51 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
/* cache the cm_id in the qp */
nesqp->cm_id = cm_id;
-
cm_id->provider_data = nesqp;
-
nesqp->private_data_len = conn_param->private_data_len;
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
- /* space for rdma0 read msg */
- if (conn_param->ord == 0)
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
conn_param->private_data_len);
+ /* set up the connection params for the node */
+ cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.cm_id = cm_id;
+ cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+ /* No port mapper available, go with the specified peer information */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
+ cm_info.mapped_rem_addr = cm_info.rem_addr;
+ cm_info.mapped_rem_port = cm_info.rem_port;
+
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(&cm_info, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(&cm_info, &pm_msg);
+ }
+
if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
- nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+ PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
apbvt_set = 1;
}
- /* set up the connection params for the node */
- cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
- cm_info.loc_port = htons(laddr->sin_port);
- cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
- cm_info.rem_port = htons(raddr->sin_port);
- cm_info.cm_id = cm_id;
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
cm_id->add_ref(cm_id);
@@ -3327,14 +3563,23 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
&cm_info);
if (!cm_node) {
if (apbvt_set)
- nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
PCI_FUNC(nesdev->pcidev->devfn),
NES_MANAGE_APBVT_DEL);
+ nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
+ cm_info.mapped_loc_port);
+ nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
+ cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
cm_id->rem_ref(cm_id);
return -ENOMEM;
}
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+ cm_node->ord_size == 0)
+ cm_node->ord_size = 1;
+
cm_node->apbvt_set = apbvt_set;
nesqp->cm_node = cm_node;
cm_node->nesqp = nesqp;
@@ -3371,13 +3616,16 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
/* setup listen params in our api call struct */
- cm_info.loc_addr = nesvnic->local_ipaddr;
- cm_info.loc_port = laddr->sin_port;
+ cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
cm_info.backlog = backlog;
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ /* No port mapper available, go with the specified info */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
if (!cm_node) {
@@ -3389,7 +3637,10 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_node;
if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
+
+ err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
NES_MANAGE_APBVT_ADD);
if (err) {
@@ -3514,9 +3765,11 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_cm_init_tsa_conn(nesqp, cm_node);
/* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3531,6 +3784,8 @@ static void cm_event_connected(struct nes_cm_event *event)
nesqp->nesqp_context->ird_ord_sizes |=
cpu_to_le32((u32)1 <<
NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)cm_node->ord_size);
/* Adjust tail for not having a LSMM */
/*nesqp->hwqp.sq_tail = 1;*/
@@ -3544,9 +3799,9 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
- nes_quad.TcpPorts[0] = raddr->sin_port;
- nes_quad.TcpPorts[1] = laddr->sin_port;
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3574,7 +3829,7 @@ static void cm_event_connected(struct nes_cm_event *event)
cm_event.ird = cm_node->ird_size;
cm_event.ord = cm_node->ord_size;
- cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -3743,8 +3998,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
cm_event.private_data = cm_node->mpa_frame_buf;
cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
+ cm_event.ird = NES_MAX_IRD;
+ cm_event.ord = NES_MAX_ORD;
+ } else {
cm_event.ird = cm_node->ird_size;
cm_event.ord = cm_node->ord_size;
+ }
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 4646e666608..f522cf63978 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -58,6 +58,8 @@
#define IETF_RDMA0_WRITE 0x8000
#define IETF_RDMA0_READ 0x4000
#define IETF_NO_IRD_ORD 0x3FFF
+#define NES_MAX_IRD 0x40
+#define NES_MAX_ORD 0x7F
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -291,8 +293,8 @@ struct nes_cm_listener {
struct list_head list;
struct nes_cm_core *cm_core;
u8 loc_mac[ETH_ALEN];
- nes_addr_t loc_addr;
- u16 loc_port;
+ nes_addr_t loc_addr, mapped_loc_addr;
+ u16 loc_port, mapped_loc_port;
struct iw_cm_id *cm_id;
enum nes_cm_conn_type conn_type;
atomic_t ref_count;
@@ -306,7 +308,9 @@ struct nes_cm_listener {
/* per connection node and node state information */
struct nes_cm_node {
nes_addr_t loc_addr, rem_addr;
+ nes_addr_t mapped_loc_addr, mapped_rem_addr;
u16 loc_port, rem_port;
+ u16 mapped_loc_port, mapped_rem_port;
u8 loc_mac[ETH_ALEN];
u8 rem_mac[ETH_ALEN];
@@ -333,6 +337,7 @@ struct nes_cm_node {
enum mpa_frame_version mpa_frame_rev;
u16 ird_size;
u16 ord_size;
+ u16 mpav2_ird_ord;
u16 mpa_frame_size;
struct iw_cm_id *cm_id;
@@ -361,6 +366,10 @@ struct nes_cm_info {
u16 rem_port;
nes_addr_t loc_addr;
nes_addr_t rem_addr;
+ u16 mapped_loc_port;
+ u16 mapped_rem_port;
+ nes_addr_t mapped_loc_addr;
+ nes_addr_t mapped_rem_addr;
enum nes_cm_conn_type conn_type;
int backlog;
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index 4926de74448..529c421bb15 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -39,8 +39,8 @@
#include <linux/types.h>
-#define NES_ABI_USERSPACE_VER 1
-#define NES_ABI_KERNEL_VER 1
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER 2
/*
* Make sure that all structs defined in this file remain laid out so
@@ -78,6 +78,7 @@ struct nes_create_cq_req {
struct nes_create_qp_req {
__u64 user_wqe_buffers;
+ __u64 user_qp_buffer;
};
enum iwnes_memreg_type {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a228..218dd357428 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1186,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
kfree(nesqp->allocated_buffer);
nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
- return NULL;
+ return ERR_PTR(-EFAULT);
}
if (req.user_wqe_buffers) {
virt_wqs = 1;
}
+ if (req.user_qp_buffer)
+ nesqp->nesuqp_addr = req.user_qp_buffer;
if ((ibpd->uobject) && (ibpd->uobject->context)) {
nesqp->user_mode = 1;
nes_ucontext = to_nesucontext(ibpd->uobject->context);
@@ -2307,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ib_mr *ibmr = ERR_PTR(-EINVAL);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct nes_ucontext *nes_ucontext;
struct nes_pbl *nespbl;
struct nes_mr *nesmr;
@@ -2315,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_mem_reg_req req;
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
- int nmap_index, page_index;
+ int entry, page_index;
int page_count = 0;
int err, pbl_depth = 0;
int chunk_pages;
@@ -2330,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u16 pbl_count;
u8 single_page = 1;
u8 stag_key;
+ int first_page = 1;
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
@@ -2380,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nesmr->region = region;
- list_for_each_entry(chunk, &region->chunk_list, list) {
- nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
- chunk->nents, chunk->nmap);
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ if (sg_dma_address(sg) & ~PAGE_MASK) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) sg_dma_address(sg));
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- if (!sg_dma_len(&chunk->page_list[nmap_index])) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ if (!sg_dma_len(sg)) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- region_length += sg_dma_len(&chunk->page_list[nmap_index]);
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- region_length -= skip_pages << 12;
- for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
- skip_pages = 0;
- if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
- goto enough_pages;
- if ((page_count&0x01FF) == 0) {
- if (page_count >= 1024 * 512) {
+ region_length += sg_dma_len(sg);
+ chunk_pages = sg_dma_len(sg) >> 12;
+ region_length -= skip_pages << 12;
+ for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
+ skip_pages = 0;
+ if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+ goto enough_pages;
+ if ((page_count&0x01FF) == 0) {
+ if (page_count >= 1024 * 512) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter,
+ nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-E2BIG);
+ goto reg_user_mr_err;
+ }
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+ 8192, &root_vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+ root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+ if (!root_vpbl.pbl_vbase) {
ib_umem_release(region);
- nes_free_resource(nesadapter,
- nesadapter->allocated_mrs, stag_index);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
- ibmr = ERR_PTR(-E2BIG);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (root_pbl_index == 1) {
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 8192, &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
- GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.pbl_vbase[0].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
- vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
+ root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+ GFP_KERNEL);
+ if (!root_vpbl.leaf_vpbl) {
ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
+ root_vpbl.pbl_vbase[0].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[0].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[0] = vpbl;
}
- if (single_page) {
- if (page_count != 0) {
- if ((last_dma_addr+4096) !=
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))
- single_page = 0;
- last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- } else {
- first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- last_dma_addr = first_dma_addr;
- }
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+ vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
+ if (1 <= root_pbl_index) {
+ root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+ root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+ }
+ root_pbl_index++;
+ cur_pbl_index = 0;
+ }
+ if (single_page) {
+ if (page_count != 0) {
+ if ((last_dma_addr+4096) !=
+ (sg_dma_address(sg)+
+ (page_index*4096)))
+ single_page = 0;
+ last_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ } else {
+ first_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ last_dma_addr = first_dma_addr;
}
-
- vpbl.pbl_vbase[cur_pbl_index].pa_low =
- cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- vpbl.pbl_vbase[cur_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096))) >> 32)));
- cur_pbl_index++;
- page_count++;
}
+
+ vpbl.pbl_vbase[cur_pbl_index].pa_low =
+ cpu_to_le32((u32)(sg_dma_address(sg)+
+ (page_index*4096)));
+ vpbl.pbl_vbase[cur_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
+ (page_index*4096))) >> 32)));
+ cur_pbl_index++;
+ page_count++;
}
}
+
enough_pages:
nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
" stag_key=0x%08x\n",
@@ -2613,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
(void *) nespbl->pbl_vbase, nespbl->user_base);
- list_for_each_entry(chunk, &region->chunk_list, list) {
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
- nespbl->page = sg_page(&chunk->page_list[0]);
- for (page_index=0; page_index<chunk_pages; page_index++) {
- ((__le32 *)pbl)[0] = cpu_to_le32((u32)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))>>32);
- nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
- (unsigned long long)*pbl,
- le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
- pbl++;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ chunk_pages = sg_dma_len(sg) >> 12;
+ chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
+ if (first_page) {
+ nespbl->page = sg_page(sg);
+ first_page = 0;
+ }
+
+ for (page_index = 0; page_index < chunk_pages; page_index++) {
+ ((__le32 *)pbl)[0] = cpu_to_le32((u32)
+ (sg_dma_address(sg)+
+ (page_index*4096)));
+ ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+ (sg_dma_address(sg)+
+ (page_index*4096)))>>32);
+ nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+ (unsigned long long)*pbl,
+ le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+ pbl++;
}
}
+
if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
} else {
@@ -2834,7 +2837,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->qp_context = nesqp->ibqp.qp_context;
init_attr->send_cq = nesqp->ibqp.send_cq;
init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+ init_attr->srq = nesqp->ibqp.srq;
init_attr->cap = attr->cap;
return 0;
@@ -3134,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- if ((!ret) ||
- ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
- (ret))) {
+ if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
if (dont_wait) {
if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 0eff7c44d76..309b31c31ae 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -184,5 +184,6 @@ struct nes_qp {
u8 pau_busy;
u8 pau_pending;
u8 pau_state;
+ __u64 nesuqp_addr;
};
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
index b5b6056c851..c0cddc0192d 100644
--- a/drivers/infiniband/hw/ocrdma/Kconfig
+++ b/drivers/infiniband/hw/ocrdma/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_OCRDMA
tristate "Emulex One Connect HCA support"
- depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
+ depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
select NET_VENDOR_EMULEX
select BE2NET
---help---
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
index 06a5bed12e4..d1bfd4f4cdd 100644
--- a/drivers/infiniband/hw/ocrdma/Makefile
+++ b/drivers/infiniband/hw/ocrdma/Makefile
@@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o
-ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
+ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f87..19011dbb930 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -35,17 +35,27 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+
+#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
+#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)"
+#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)"
+
+#define OC_SKH_DEVICE_PF 0x720
+#define OC_SKH_DEVICE_VF 0x728
#define OCRDMA_MAX_AH 512
#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
+
struct ocrdma_dev_attr {
u8 fw_ver[32];
u32 vendor_id;
@@ -65,6 +75,7 @@ struct ocrdma_dev_attr {
int max_mr;
u64 max_mr_size;
u32 max_num_mr_pbl;
+ int max_mw;
int max_fmr;
int max_map_per_fmr;
int max_pages_per_frmr;
@@ -83,6 +94,12 @@ struct ocrdma_dev_attr {
u8 num_ird_pages;
};
+struct ocrdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+};
+
struct ocrdma_pbl {
void *va;
dma_addr_t pa;
@@ -122,6 +139,52 @@ struct mqe_ctx {
bool cmd_done;
};
+struct ocrdma_hw_mr {
+ u32 lkey;
+ u8 fr_mr;
+ u8 remote_atomic;
+ u8 remote_rd;
+ u8 remote_wr;
+ u8 local_rd;
+ u8 local_wr;
+ u8 mw_bind;
+ u8 rsvd;
+ u64 len;
+ struct ocrdma_pbl *pbl_table;
+ u32 num_pbls;
+ u32 num_pbes;
+ u32 pbl_size;
+ u32 pbe_size;
+ u64 fbo;
+ u64 va;
+};
+
+struct ocrdma_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct ocrdma_hw_mr hwmr;
+};
+
+struct ocrdma_stats {
+ u8 type;
+ struct ocrdma_dev *dev;
+};
+
+struct stats_mem {
+ struct ocrdma_mqe mqe;
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+ char *debugfs_mem;
+};
+
+struct phy_info {
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u16 phy_type;
+ u16 interface_type;
+};
+
struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -165,12 +228,30 @@ struct ocrdma_dev {
struct mqe_ctx mqe_ctx;
struct be_dev_info nic_info;
+ struct phy_info phy;
+ char model_number[32];
+ u32 hba_port_num;
struct list_head entry;
struct rcu_head rcu;
int id;
u64 stag_arr[OCRDMA_MAX_STAG];
u16 pvid;
+ u32 asic_id;
+
+ ulong last_stats_time;
+ struct mutex stats_lock; /* provide synch for debugfs operations */
+ struct stats_mem stats_mem;
+ struct ocrdma_stats rsrc_stats;
+ struct ocrdma_stats rx_stats;
+ struct ocrdma_stats wqe_stats;
+ struct ocrdma_stats tx_stats;
+ struct ocrdma_stats db_err_stats;
+ struct ocrdma_stats tx_qp_err_stats;
+ struct ocrdma_stats rx_qp_err_stats;
+ struct ocrdma_stats tx_dbg_stats;
+ struct ocrdma_stats rx_dbg_stats;
+ struct dentry *dir;
};
struct ocrdma_cq {
@@ -183,8 +264,8 @@ struct ocrdma_cq {
*/
u32 max_hw_cqe;
bool phase_change;
- bool armed, solicited;
- bool arm_needed;
+ bool deferred_arm, deferred_sol;
+ bool first_arm;
spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
* to cq polling
@@ -197,6 +278,7 @@ struct ocrdma_cq {
struct ocrdma_ucontext *ucontext;
dma_addr_t pa;
u32 len;
+ u32 cqe_cnt;
/* head of all qp's sq and rq for which cqes need to be flushed
* by the software.
@@ -206,7 +288,6 @@ struct ocrdma_cq {
struct ocrdma_pd {
struct ib_pd ibpd;
- struct ocrdma_dev *dev;
struct ocrdma_ucontext *uctx;
u32 id;
int num_dpp_qp;
@@ -291,33 +372,6 @@ struct ocrdma_qp {
bool dpp_enabled;
u8 *ird_q_va;
bool signaled;
- u16 db_cache;
-};
-
-struct ocrdma_hw_mr {
- u32 lkey;
- u8 fr_mr;
- u8 remote_atomic;
- u8 remote_rd;
- u8 remote_wr;
- u8 local_rd;
- u8 local_wr;
- u8 mw_bind;
- u8 rsvd;
- u64 len;
- struct ocrdma_pbl *pbl_table;
- u32 num_pbls;
- u32 num_pbes;
- u32 pbl_size;
- u32 pbe_size;
- u64 fbo;
- u64 va;
-};
-
-struct ocrdma_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct ocrdma_hw_mr hwmr;
};
struct ocrdma_ucontext {
@@ -384,13 +438,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct ocrdma_srq, ibsrq);
}
-
-static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
-{
- return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
- qp->id < 128) ? 24 : 16);
-}
-
static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
{
int cqe_valid;
@@ -422,5 +469,53 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
}
+static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
+ struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+ struct in6_addr in6;
+
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6))
+ rdma_get_mcast_mac(&in6, mac_addr);
+ else
+ memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ return 0;
+}
+
+static inline char *hca_name(struct ocrdma_dev *dev)
+{
+ switch (dev->nic_info.pdev->device) {
+ case OC_SKH_DEVICE_PF:
+ case OC_SKH_DEVICE_VF:
+ return OC_NAME_SH;
+ default:
+ return OC_NAME_UNKNOWN;
+ }
+}
+
+static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev,
+ int eqid)
+{
+ int indx;
+
+ for (indx = 0; indx < dev->eq_cnt; indx++) {
+ if (dev->eq_tbl[indx].q.id == eqid)
+ return indx;
+ }
+
+ return -EINVAL;
+}
+
+static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
+{
+ if (dev->nic_info.dev_family == 0xF && !dev->asic_id) {
+ pci_read_config_dword(
+ dev->nic_info.pdev,
+ OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id);
+ }
+
+ return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >>
+ OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
+}
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
index fbac8eb4403..1554cca5712 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -28,7 +28,8 @@
#ifndef __OCRDMA_ABI_H__
#define __OCRDMA_ABI_H__
-#define OCRDMA_ABI_VERSION 1
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
/* user kernel communication data structures. */
struct ocrdma_alloc_ucontext_resp {
@@ -107,9 +108,7 @@ struct ocrdma_create_qp_uresp {
u32 db_sq_offset;
u32 db_rq_offset;
u32 db_shift;
- u64 rsvd1;
- u64 rsvd2;
- u64 rsvd3;
+ u64 rsvd[11];
} __packed;
struct ocrdma_create_srq_uresp {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ee499d94225..d4cc01f10c0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -49,7 +49,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
ah->sgid_index = attr->grh.sgid_index;
- vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+ vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag && (vlan_tag < 0x1000)) {
@@ -64,7 +64,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
eth_sz = sizeof(struct ocrdma_eth_basic);
}
memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
- status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
+ memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
+ status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
@@ -84,6 +85,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
if (vlan_enabled)
ah->av->valid |= OCRDMA_AV_VLAN_VALID;
+ ah->av->valid = cpu_to_le32(ah->av->valid);
return status;
}
@@ -98,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 4ed8235d2d3..3bbf2010a82 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -32,7 +32,6 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_addr.h>
#include "ocrdma.h"
#include "ocrdma_hw.h"
@@ -150,7 +149,7 @@ enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
return IB_QPS_SQE;
case OCRDMA_QPS_ERR:
return IB_QPS_ERR;
- };
+ }
return IB_QPS_ERR;
}
@@ -171,7 +170,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
return OCRDMA_QPS_SQE;
case IB_QPS_ERR:
return OCRDMA_QPS_ERR;
- };
+ }
return OCRDMA_QPS_ERR;
}
@@ -243,6 +242,23 @@ static int ocrdma_get_mbx_errno(u32 status)
return err_num;
}
+char *port_speed_string(struct ocrdma_dev *dev)
+{
+ char *str = "";
+ u16 speeds_supported;
+
+ speeds_supported = dev->phy.fixed_speeds_supported |
+ dev->phy.auto_speeds_supported;
+ if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS)
+ str = "40Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS)
+ str = "10Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS)
+ str = "1Gbps ";
+
+ return str;
+}
+
static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
{
int err_num = -EINVAL;
@@ -332,6 +348,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
return mqe;
}
+static void *ocrdma_alloc_mqe(void)
+{
+ return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+}
+
static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
{
dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -364,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
}
}
-static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
- int queue_type)
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *q, int queue_type)
{
u8 opcode = 0;
int status;
@@ -444,7 +465,7 @@ mbx_err:
return status;
}
-static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
{
int irq;
@@ -574,6 +595,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev)
if (status)
goto alloc_err;
+ dev->eq_tbl[0].cq_cnt++;
status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
if (status)
goto mbx_cq_free;
@@ -639,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
{
struct ocrdma_qp *qp = NULL;
struct ocrdma_cq *cq = NULL;
- struct ib_event ib_evt;
+ struct ib_event ib_evt = { 0 };
int cq_event = 0;
int qp_event = 1;
int srq_event = 0;
@@ -664,6 +686,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
case OCRDMA_CQ_OVERRUN_ERROR:
ib_evt.element.cq = &cq->ibcq;
ib_evt.event = IB_EVENT_CQ_ERR;
+ cq_event = 1;
+ qp_event = 0;
break;
case OCRDMA_CQ_QPCAT_ERROR:
ib_evt.element.qp = &qp->ibqp;
@@ -725,6 +749,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
qp->srq->ibsrq.
srq_context);
} else if (dev_event) {
+ pr_err("%s: Fatal event received\n", dev->ibdev.name);
ib_dispatch_event(&ib_evt);
}
@@ -752,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
}
}
-
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
@@ -799,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
ocrdma_process_acqe(dev, cqe);
else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
ocrdma_process_mcqe(dev, cqe);
- else
- pr_err("%s() cqe->compl is not set.\n", __func__);
memset(cqe, 0, sizeof(struct ocrdma_mcqe));
ocrdma_mcq_inc_tail(dev);
}
@@ -858,16 +880,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
BUG();
cq = dev->cq_tbl[cq_idx];
- if (cq == NULL) {
- pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+ if (cq == NULL)
return;
- }
- spin_lock_irqsave(&cq->cq_lock, flags);
- cq->armed = false;
- cq->solicited = false;
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
- ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
if (cq->ibcq.comp_handler) {
spin_lock_irqsave(&cq->comp_handler_lock, flags);
@@ -892,27 +906,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
struct ocrdma_dev *dev = eq->dev;
struct ocrdma_eqe eqe;
struct ocrdma_eqe *ptr;
- u16 eqe_popped = 0;
u16 cq_id;
- while (1) {
+ int budget = eq->cq_cnt;
+
+ do {
ptr = ocrdma_get_eqe(eq);
eqe = *ptr;
ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
break;
- eqe_popped += 1;
+
ptr->id_valid = 0;
+ /* ring eq doorbell as soon as its consumed. */
+ ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1);
/* check whether its CQE or not. */
if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
ocrdma_cq_handler(dev, cq_id);
}
ocrdma_eq_inc_tail(eq);
- }
- ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
- /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
- if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
- ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+ /* There can be a stale EQE after the last bound CQ is
+ * destroyed. EQE valid and budget == 0 implies this.
+ */
+ if (budget)
+ budget--;
+
+ } while (budget);
+
+ ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
return IRQ_HANDLED;
}
@@ -949,7 +971,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
{
int status = 0;
u16 cqe_status, ext_status;
- struct ocrdma_mqe *rsp;
+ struct ocrdma_mqe *rsp_mqe;
+ struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
ocrdma_post_mqe(dev, mqe);
@@ -958,23 +981,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
goto mbx_err;
cqe_status = dev->mqe_ctx.cqe_status;
ext_status = dev->mqe_ctx.ext_status;
- rsp = ocrdma_get_mqe_rsp(dev);
- ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
+ rsp_mqe = ocrdma_get_mqe_rsp(dev);
+ ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe)));
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ rsp = &mqe->u.rsp;
+
if (cqe_status || ext_status) {
- pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
- __func__,
- (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
- OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
+ pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+ __func__, cqe_status, ext_status);
+ if (rsp) {
+ /* This is for embedded cmds. */
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ }
status = ocrdma_get_mbx_cqe_errno(cqe_status);
goto mbx_err;
}
- if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
+ /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */
+ if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK))
status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
mbx_err:
mutex_unlock(&dev->mqe_ctx.lock);
return status;
}
+static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
+ void *payload_va)
+{
+ int status = 0;
+ struct ocrdma_mbx_rsp *rsp = payload_va;
+
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ BUG();
+
+ status = ocrdma_mbx_cmd(dev, mqe);
+ if (!status)
+ /* For non embedded, only CQE failures are handled in
+ * ocrdma_mbx_cmd. We need to check for RSP errors.
+ */
+ if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK)
+ status = ocrdma_get_mbx_errno(rsp->status);
+
+ if (status)
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ return status;
+}
+
static void ocrdma_get_attr(struct ocrdma_dev *dev,
struct ocrdma_dev_attr *attr,
struct ocrdma_mbx_query_config *rsp)
@@ -985,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_qp =
(rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+ attr->max_srq =
+ (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
attr->max_send_sge = ((rsp->max_write_send_sge &
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
@@ -1000,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
- attr->max_srq =
- (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
- OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1015,6 +1076,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+ attr->max_mw = rsp->max_mw;
attr->max_mr = rsp->max_mr;
attr->max_mr_size = ~0ull;
attr->max_fmr = 0;
@@ -1036,7 +1098,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_inline_data =
attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
sizeof(struct ocrdma_sge));
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
attr->ird = 1;
attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
@@ -1110,6 +1172,96 @@ mbx_err:
return status;
}
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
+{
+ struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
+ struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
+ struct ocrdma_rdma_stats_resp *old_stats = NULL;
+ int status;
+
+ old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+ if (old_stats == NULL)
+ return -ENOMEM;
+
+ memset(mqe, 0, sizeof(*mqe));
+ mqe->hdr.pyld_len = dev->stats_mem.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa);
+ mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size;
+
+ /* Cache the old stats */
+ memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp));
+ memset(req, 0, dev->stats_mem.size);
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)req,
+ OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_SUBSYS_ROCE,
+ dev->stats_mem.size);
+ if (reset)
+ req->reset_stats = reset;
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va);
+ if (status)
+ /* Copy from cache, if mbox fails */
+ memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp));
+ else
+ ocrdma_le32_to_cpu(req, dev->stats_mem.size);
+
+ kfree(old_stats);
+ return status;
+}
+
+static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dma_mem dma;
+ struct ocrdma_mqe *mqe;
+ struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
+ struct mgmt_hba_attribs *hba_attribs;
+
+ mqe = ocrdma_alloc_mqe();
+ if (!mqe)
+ return status;
+ memset(mqe, 0, sizeof(*mqe));
+
+ dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
+ dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev,
+ dma.size, &dma.pa, GFP_KERNEL);
+ if (!dma.va)
+ goto free_mqe;
+
+ mqe->hdr.pyld_len = dma.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
+ mqe->u.nonemb_req.sge[0].len = dma.size;
+
+ memset(dma.va, 0, dma.size);
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
+ OCRDMA_SUBSYS_COMMON,
+ dma.size);
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va);
+ if (!status) {
+ ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
+ hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
+
+ dev->hba_port_num = hba_attribs->phy_port;
+ strncpy(dev->model_number,
+ hba_attribs->controller_model_number, 31);
+ }
+ dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
+free_mqe:
+ kfree(mqe);
+ return status;
+}
+
static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
{
int status = -ENOMEM;
@@ -1157,6 +1309,35 @@ mbx_err:
return status;
}
+static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mqe *cmd;
+ struct ocrdma_get_phy_info_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON,
+ sizeof(*cmd));
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
+ dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.auto_speeds_supported =
+ le16_to_cpu(rsp->auto_speeds_supported);
+ dev->phy.fixed_speeds_supported =
+ le16_to_cpu(rsp->fixed_speeds_supported);
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
{
int status = -ENOMEM;
@@ -1226,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
{
- int i ;
+ int i;
int status = 0;
int max_ah;
struct ocrdma_create_ah_tbl *cmd;
@@ -1357,12 +1538,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
int i;
mutex_lock(&dev->dev_lock);
- for (i = 0; i < dev->eq_cnt; i++) {
- if (dev->eq_tbl[i].q.id != eq_id)
- continue;
- dev->eq_tbl[i].cq_cnt -= 1;
- break;
- }
+ i = ocrdma_get_eq_table_index(dev, eq_id);
+ if (i == -EINVAL)
+ BUG();
+ dev->eq_tbl[i].cq_cnt -= 1;
mutex_unlock(&dev->dev_lock);
}
@@ -1380,7 +1559,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
__func__, dev->id, dev->attr.max_cqe, entries);
return -EINVAL;
}
- if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
+ if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R))
return -EINVAL;
if (dpp_cq) {
@@ -1417,6 +1596,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
cq->eqn = ocrdma_bind_eq(dev);
cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
cqe_count = cq->len / cqe_size;
+ cq->cqe_cnt = cqe_count;
if (cqe_count > 1024) {
/* Set cnt to 3 to indicate more than 1024 cq entries */
cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
@@ -1439,7 +1619,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
}
/* shared eq between all the consumer cqs. */
cmd->cmd.eqn = cq->eqn;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
if (dpp_cq)
cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
OCRDMA_CREATE_CQ_TYPE_SHIFT;
@@ -1484,12 +1664,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
(cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
OCRDMA_DESTROY_CQ_QID_MASK;
- ocrdma_unbind_eq(dev, cq->eqn);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
+ ocrdma_unbind_eq(dev, cq->eqn);
dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
-mbx_err:
kfree(cmd);
return status;
}
@@ -1783,7 +1960,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
u32 max_sges = attrs->cap.max_send_sge;
/* QP1 may exceed 127 */
- max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+ max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
dev->attr.max_wqe);
status = ocrdma_build_q_conf(&max_wqe_allocated,
@@ -1982,7 +2159,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
break;
default:
return -EINVAL;
- };
+ }
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
if (!cmd)
@@ -2029,8 +2206,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
qp->rq_cq = cq;
- if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
- (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
+ if (pd->dpp_enabled && pd->num_dpp_qp) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
}
@@ -2076,23 +2252,6 @@ mbx_err:
return status;
}
-int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
- u8 *mac_addr)
-{
- struct in6_addr in6;
-
- memcpy(&in6, dgid, sizeof in6);
- if (rdma_is_multicast_addr(&in6)) {
- rdma_get_mcast_mac(&in6, mac_addr);
- } else if (rdma_link_local_addr(&in6)) {
- rdma_get_ll_mac(&in6, mac_addr);
- } else {
- pr_err("%s() fail to resolve mac_addr.\n", __func__);
- return -EINVAL;
- }
- return 0;
-}
-
static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
struct ib_qp_attr *attrs)
@@ -2116,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
status = ocrdma_query_gid(&qp->dev->ibdev, 1,
- ah_attr->grh.sgid_index, &sgid);
+ ah_attr->grh.sgid_index, &sgid);
if (status)
return status;
@@ -2126,14 +2285,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
qp->sgid_idx = ah_attr->grh.sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
- ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
+ ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]);
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
/* convert them to LE format. */
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- vlan_id = rdma_get_vlan_id(&sgid);
+ vlan_id = ah_attr->vlan_id;
if (vlan_id && (vlan_id < 0x1000)) {
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
@@ -2144,8 +2303,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps)
+ struct ib_qp_attr *attrs, int attr_mask)
{
int status = 0;
@@ -2250,8 +2408,7 @@ pmtu_err:
}
int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps)
+ struct ib_qp_attr *attrs, int attr_mask)
{
int status = -ENOMEM;
struct ocrdma_modify_qp *cmd;
@@ -2274,7 +2431,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
OCRDMA_QP_PARAMS_STATE_MASK;
}
- status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
+ status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask);
if (status)
goto mbx_err;
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
@@ -2505,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev)
for (i = 0; i < num_eq; i++) {
status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
- OCRDMA_EQ_LEN);
+ OCRDMA_EQ_LEN);
if (status) {
status = -EINVAL;
break;
@@ -2550,6 +2707,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
status = ocrdma_mbx_create_ah_tbl(dev);
if (status)
goto conf_err;
+ status = ocrdma_mbx_get_phy_info(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_get_ctrl_attribs(dev);
+ if (status)
+ goto conf_err;
+
return 0;
conf_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index f2a89d4cc7c..e513f729314 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -94,7 +94,6 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
-int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
@@ -113,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
u16 *dpp_credit_lmt);
int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps);
+ struct ib_qp_attr *attrs, int attr_mask);
int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
struct ocrdma_qp_params *param);
int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
@@ -133,5 +131,8 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
void ocrdma_flush_qp(struct ocrdma_qp *);
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
+char *port_speed_string(struct ocrdma_dev *dev);
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 56e004940f1..7c504e07974 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -39,10 +39,11 @@
#include "ocrdma_ah.h"
#include "be_roce.h"
#include "ocrdma_hw.h"
+#include "ocrdma_stats.h"
#include "ocrdma_abi.h"
-MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
-MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
+MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
+MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("GPL");
@@ -67,46 +68,24 @@ void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
guid[7] = mac_addr[5];
}
-static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
-{
- sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- sgid->raw[8] = mac_addr[0] ^ 2;
- sgid->raw[9] = mac_addr[1];
- sgid->raw[10] = mac_addr[2];
- if (is_vlan) {
- sgid->raw[11] = vlan_id >> 8;
- sgid->raw[12] = vlan_id & 0xff;
- } else {
- sgid->raw[11] = 0xff;
- sgid->raw[12] = 0xfe;
- }
- sgid->raw[13] = mac_addr[3];
- sgid->raw[14] = mac_addr[4];
- sgid->raw[15] = mac_addr[5];
-}
-
-static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
+static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid)
{
int i;
- union ib_gid new_sgid;
unsigned long flags;
memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
- ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
for (i = 0; i < OCRDMA_MAX_SGID; i++) {
if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
sizeof(union ib_gid))) {
/* found free entry */
- memcpy(&dev->sgid_tbl[i], &new_sgid,
+ memcpy(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid));
spin_unlock_irqrestore(&dev->sgid_lock, flags);
return true;
- } else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
+ } else if (!memcmp(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid))) {
/* entry already present, no addition is required. */
spin_unlock_irqrestore(&dev->sgid_lock, flags);
@@ -117,20 +96,17 @@ static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return false;
}
-static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
+static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid)
{
int found = false;
int i;
- union ib_gid sgid;
unsigned long flags;
- ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
/* first is default sgid, which cannot be deleted. */
for (i = 1; i < OCRDMA_MAX_SGID; i++) {
- if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
+ if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) {
/* found matching entry */
memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
found = true;
@@ -141,75 +117,18 @@ static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return found;
}
-static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
-{
- /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
- union ib_gid *sgid = &dev->sgid_tbl[0];
-
- sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- ocrdma_get_guid(dev, &sgid->raw[8]);
-}
-
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
- struct net_device *netdev, *tmp;
- u16 vlan_id;
- bool is_vlan;
-
- netdev = dev->nic_info.netdev;
-
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
- if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
- if (!netif_running(tmp) || !netif_oper_up(tmp))
- continue;
- if (netdev != tmp) {
- vlan_id = vlan_dev_vlan_id(tmp);
- is_vlan = true;
- } else {
- is_vlan = false;
- vlan_id = 0;
- tmp = netdev;
- }
- ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
- }
- }
- rcu_read_unlock();
-}
-#else
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
-
-}
-#endif /* VLAN */
-
-static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
-{
- ocrdma_add_default_sgid(dev);
- ocrdma_add_vlan_sgids(dev);
- return 0;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-static int ocrdma_inet6addr_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
+static int ocrdma_addr_event(unsigned long event, struct net_device *netdev,
+ union ib_gid *gid)
{
- struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct net_device *netdev = ifa->idev->dev;
struct ib_event gid_event;
struct ocrdma_dev *dev;
bool found = false;
bool updated = false;
bool is_vlan = false;
- u16 vid = 0;
is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
- if (is_vlan) {
- vid = vlan_dev_vlan_id(netdev);
- netdev = vlan_dev_real_dev(netdev);
- }
+ if (is_vlan)
+ netdev = rdma_vlan_dev_real_dev(netdev);
rcu_read_lock();
list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
@@ -222,16 +141,14 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
if (!found)
return NOTIFY_DONE;
- if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
- return NOTIFY_DONE;
mutex_lock(&dev->dev_lock);
switch (event) {
case NETDEV_UP:
- updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
+ updated = ocrdma_add_sgid(dev, gid);
break;
case NETDEV_DOWN:
- updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
+ updated = ocrdma_del_sgid(dev, gid);
break;
default:
break;
@@ -247,6 +164,32 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
return NOTIFY_OK;
}
+static int ocrdma_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+ return ocrdma_addr_event(event, netdev, &gid);
+}
+
+static struct notifier_block ocrdma_inetaddr_notifier = {
+ .notifier_call = ocrdma_inetaddr_event
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static int ocrdma_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *netdev = ifa->idev->dev;
+ return ocrdma_addr_event(event, netdev, gid);
+}
+
static struct notifier_block ocrdma_inet6addr_notifier = {
.notifier_call = ocrdma_inet6addr_event
};
@@ -344,7 +287,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.process_mad = ocrdma_process_mad;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
OCRDMA_UVERBS(CREATE_SRQ) |
OCRDMA_UVERBS(MODIFY_SRQ) |
@@ -396,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
kfree(dev->sgid_tbl);
}
+/* OCRDMA sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+
+static struct device_attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver
+};
+
+static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
+}
+
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
- int status = 0;
+ int status = 0, i;
struct ocrdma_dev *dev;
dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -423,19 +399,29 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
- status = ocrdma_build_sgid_tbl(dev);
- if (status)
- goto alloc_err;
-
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
+ goto sysfs_err;
spin_lock(&ocrdma_devlist_lock);
list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
spin_unlock(&ocrdma_devlist_lock);
+ /* Init stats */
+ ocrdma_add_port_stats(dev);
+
+ pr_info("%s %s: %s \"%s\" port %d\n",
+ dev_name(&dev->nic_info.pdev->dev), hca_name(dev),
+ port_speed_string(dev), dev->model_number,
+ dev->hba_port_num);
+ pr_info("%s ocrdma%d driver loaded successfully\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
return dev;
+sysfs_err:
+ ocrdma_remove_sysfiles(dev);
alloc_err:
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
@@ -452,9 +438,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
{
struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
- ocrdma_free_resources(dev);
- ocrdma_cleanup_hw(dev);
-
idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
ib_dealloc_device(&dev->ibdev);
@@ -465,11 +448,18 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
/* first unregister with stack to stop all the active traffic
* of the registered clients.
*/
+ ocrdma_rem_port_stats(dev);
+ ocrdma_remove_sysfiles(dev);
+
ib_unregister_device(&dev->ibdev);
spin_lock(&ocrdma_devlist_lock);
list_del_rcu(&dev->entry);
spin_unlock(&ocrdma_devlist_lock);
+
+ ocrdma_free_resources(dev);
+ ocrdma_cleanup_hw(dev);
+
call_rcu(&dev->rcu, ocrdma_remove_free);
}
@@ -498,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
cur_qp = dev->qp_tbl;
for (i = 0; i < OCRDMA_MAX_QP; i++) {
qp = cur_qp[i];
- if (qp) {
+ if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
/* change the QP state to ERROR */
_ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
@@ -531,7 +521,7 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
case BE_DEV_DOWN:
ocrdma_close(dev);
break;
- };
+ }
}
static struct ocrdma_driver ocrdma_drv = {
@@ -539,6 +529,7 @@ static struct ocrdma_driver ocrdma_drv = {
.add = ocrdma_add,
.remove = ocrdma_remove,
.state_change_handler = ocrdma_event_handler,
+ .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION,
};
static void ocrdma_unregister_inet6addr_notifier(void)
@@ -548,20 +539,37 @@ static void ocrdma_unregister_inet6addr_notifier(void)
#endif
}
+static void ocrdma_unregister_inetaddr_notifier(void)
+{
+ unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+}
+
static int __init ocrdma_init_module(void)
{
int status;
+ ocrdma_init_debugfs();
+
+ status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+ if (status)
+ return status;
+
#if IS_ENABLED(CONFIG_IPV6)
status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
if (status)
- return status;
+ goto err_notifier6;
#endif
status = be_roce_register_driver(&ocrdma_drv);
if (status)
- ocrdma_unregister_inet6addr_notifier();
+ goto err_be_reg;
+ return 0;
+
+err_be_reg:
+ ocrdma_unregister_inet6addr_notifier();
+err_notifier6:
+ ocrdma_unregister_inetaddr_notifier();
return status;
}
@@ -569,6 +577,8 @@ static void __exit ocrdma_exit_module(void)
{
be_roce_unregister_driver(&ocrdma_drv);
ocrdma_unregister_inet6addr_notifier();
+ ocrdma_unregister_inetaddr_notifier();
+ ocrdma_rem_debugfs();
}
module_init(ocrdma_init_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 9f9570ec3c2..96c9ee602ba 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -30,8 +30,16 @@
#define Bit(_b) (1 << (_b))
-#define OCRDMA_GEN1_FAMILY 0xB
-#define OCRDMA_GEN2_FAMILY 0x2
+enum {
+ OCRDMA_ASIC_GEN_SKH_R = 0x04,
+ OCRDMA_ASIC_GEN_LANCER = 0x0B
+};
+
+enum {
+ OCRDMA_ASIC_REV_A0 = 0x00,
+ OCRDMA_ASIC_REV_B0 = 0x10,
+ OCRDMA_ASIC_REV_C0 = 0x20
+};
#define OCRDMA_SUBSYS_ROCE 10
enum {
@@ -64,6 +72,7 @@ enum {
OCRDMA_CMD_ATTACH_MCAST,
OCRDMA_CMD_DETACH_MCAST,
+ OCRDMA_CMD_GET_RDMA_STATS,
OCRDMA_CMD_MAX
};
@@ -74,12 +83,14 @@ enum {
OCRDMA_CMD_CREATE_CQ = 12,
OCRDMA_CMD_CREATE_EQ = 13,
OCRDMA_CMD_CREATE_MQ = 21,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32,
OCRDMA_CMD_GET_FW_VER = 35,
OCRDMA_CMD_DELETE_MQ = 53,
OCRDMA_CMD_DELETE_CQ = 54,
OCRDMA_CMD_DELETE_EQ = 55,
OCRDMA_CMD_GET_FW_CONFIG = 58,
- OCRDMA_CMD_CREATE_MQ_EXT = 90
+ OCRDMA_CMD_CREATE_MQ_EXT = 90,
+ OCRDMA_CMD_PHY_DETAILS = 102
};
enum {
@@ -103,7 +114,10 @@ enum {
OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET,
OCRDMA_DB_CQ_OFFSET = 0x120,
OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET,
- OCRDMA_DB_MQ_OFFSET = 0x140
+ OCRDMA_DB_MQ_OFFSET = 0x140,
+
+ OCRDMA_DB_SQ_SHIFT = 16,
+ OCRDMA_DB_RQ_SHIFT = 24
};
#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
@@ -138,6 +152,10 @@ enum {
#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
#define OCRDMA_MAX_Q_PAGES (8)
+#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C
+#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF
+#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00
+#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08
/*
# 0: 4K Bytes
# 1: 8K Bytes
@@ -562,6 +580,30 @@ enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+struct ocrdma_get_phy_info_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u16 phy_type;
+ u16 interface_type;
+ u32 misc_params;
+ u16 ext_phy_details;
+ u16 rsvd;
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u32 future_use[2];
+};
+
+enum {
+ OCRDMA_PHY_SPEED_ZERO = 0x0,
+ OCRDMA_PHY_SPEED_10MBPS = 0x1,
+ OCRDMA_PHY_SPEED_100MBPS = 0x2,
+ OCRDMA_PHY_SPEED_1GBPS = 0x4,
+ OCRDMA_PHY_SPEED_10GBPS = 0x8,
+ OCRDMA_PHY_SPEED_40GBPS = 0x20
+};
+
+
struct ocrdma_get_link_speed_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
@@ -590,7 +632,7 @@ enum {
enum {
OCRDMA_CREATE_CQ_VER2 = 2,
- OCRDMA_CREATE_CQ_VER3 = 3,
+ OCRDMA_CREATE_CQ_VER3 = 3,
OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
@@ -1050,6 +1092,7 @@ enum {
OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
};
+
struct ocrdma_modify_qp_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
@@ -1062,8 +1105,8 @@ struct ocrdma_query_qp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
-#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
-#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
+#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
u32 qp_id;
};
@@ -1694,7 +1737,7 @@ struct ocrdma_grh {
u16 rsvd;
} __packed;
-#define OCRDMA_AV_VALID Bit(0)
+#define OCRDMA_AV_VALID Bit(7)
#define OCRDMA_AV_VLAN_VALID Bit(1)
struct ocrdma_av {
@@ -1703,4 +1746,208 @@ struct ocrdma_av {
u32 valid;
} __packed;
+struct ocrdma_rsrc_stats {
+ u32 dpp_pds;
+ u32 non_dpp_pds;
+ u32 rc_dpp_qps;
+ u32 uc_dpp_qps;
+ u32 ud_dpp_qps;
+ u32 rc_non_dpp_qps;
+ u32 rsvd;
+ u32 uc_non_dpp_qps;
+ u32 ud_non_dpp_qps;
+ u32 rsvd1;
+ u32 srqs;
+ u32 rbqs;
+ u32 r64K_nsmr;
+ u32 r64K_to_2M_nsmr;
+ u32 r2M_to_44M_nsmr;
+ u32 r44M_to_1G_nsmr;
+ u32 r1G_to_4G_nsmr;
+ u32 nsmr_count_4G_to_32G;
+ u32 r32G_to_64G_nsmr;
+ u32 r64G_to_128G_nsmr;
+ u32 r128G_to_higher_nsmr;
+ u32 embedded_nsmr;
+ u32 frmr;
+ u32 prefetch_qps;
+ u32 ondemand_qps;
+ u32 phy_mr;
+ u32 mw;
+ u32 rsvd2[7];
+};
+
+struct ocrdma_db_err_stats {
+ u32 sq_doorbell_errors;
+ u32 cq_doorbell_errors;
+ u32 rq_srq_doorbell_errors;
+ u32 cq_overflow_errors;
+ u32 rsvd[4];
+};
+
+struct ocrdma_wqe_stats {
+ u32 large_send_rc_wqes_lo;
+ u32 large_send_rc_wqes_hi;
+ u32 large_write_rc_wqes_lo;
+ u32 large_write_rc_wqes_hi;
+ u32 rsvd[4];
+ u32 read_wqes_lo;
+ u32 read_wqes_hi;
+ u32 frmr_wqes_lo;
+ u32 frmr_wqes_hi;
+ u32 mw_bind_wqes_lo;
+ u32 mw_bind_wqes_hi;
+ u32 invalidate_wqes_lo;
+ u32 invalidate_wqes_hi;
+ u32 rsvd1[2];
+ u32 dpp_wqe_drops;
+ u32 rsvd2[5];
+};
+
+struct ocrdma_tx_stats {
+ u32 send_pkts_lo;
+ u32 send_pkts_hi;
+ u32 write_pkts_lo;
+ u32 write_pkts_hi;
+ u32 read_pkts_lo;
+ u32 read_pkts_hi;
+ u32 read_rsp_pkts_lo;
+ u32 read_rsp_pkts_hi;
+ u32 ack_pkts_lo;
+ u32 ack_pkts_hi;
+ u32 send_bytes_lo;
+ u32 send_bytes_hi;
+ u32 write_bytes_lo;
+ u32 write_bytes_hi;
+ u32 read_req_bytes_lo;
+ u32 read_req_bytes_hi;
+ u32 read_rsp_bytes_lo;
+ u32 read_rsp_bytes_hi;
+ u32 ack_timeouts;
+ u32 rsvd[5];
+};
+
+
+struct ocrdma_tx_qp_err_stats {
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 retry_count_exceeded_errors;
+ u32 rnr_retry_count_exceeded_errors;
+ u32 rsvd[3];
+};
+
+struct ocrdma_rx_stats {
+ u32 roce_frame_bytes_lo;
+ u32 roce_frame_bytes_hi;
+ u32 roce_frame_icrc_drops;
+ u32 roce_frame_payload_len_drops;
+ u32 ud_drops;
+ u32 qp1_drops;
+ u32 psn_error_request_packets;
+ u32 psn_error_resp_packets;
+ u32 rnr_nak_timeouts;
+ u32 rnr_nak_receives;
+ u32 roce_frame_rxmt_drops;
+ u32 nak_count_psn_sequence_errors;
+ u32 rc_drop_count_lookup_errors;
+ u32 rq_rnr_naks;
+ u32 srq_rnr_naks;
+ u32 roce_frames_lo;
+ u32 roce_frames_hi;
+ u32 rsvd;
+};
+
+struct ocrdma_rx_qp_err_stats {
+ u32 nak_invalid_requst_errors;
+ u32 nak_remote_operation_errors;
+ u32 nak_count_remote_access_errors;
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 rsvd[2];
+};
+
+struct ocrdma_tx_dbg_stats {
+ u32 data[100];
+};
+
+struct ocrdma_rx_dbg_stats {
+ u32 data[200];
+};
+
+struct ocrdma_rdma_stats_req {
+ struct ocrdma_mbx_hdr hdr;
+ u8 reset_stats;
+ u8 rsvd[3];
+} __packed;
+
+struct ocrdma_rdma_stats_resp {
+ struct ocrdma_mbx_hdr hdr;
+ struct ocrdma_rsrc_stats act_rsrc_stats;
+ struct ocrdma_rsrc_stats th_rsrc_stats;
+ struct ocrdma_db_err_stats db_err_stats;
+ struct ocrdma_wqe_stats wqe_stats;
+ struct ocrdma_tx_stats tx_stats;
+ struct ocrdma_tx_qp_err_stats tx_qp_err_stats;
+ struct ocrdma_rx_stats rx_stats;
+ struct ocrdma_rx_qp_err_stats rx_qp_err_stats;
+ struct ocrdma_tx_dbg_stats tx_dbg_stats;
+ struct ocrdma_rx_dbg_stats rx_dbg_stats;
+} __packed;
+
+
+struct mgmt_hba_attribs {
+ u8 flashrom_version_string[32];
+ u8 manufacturer_name[32];
+ u32 supported_modes;
+ u32 rsvd0[3];
+ u8 ncsi_ver_string[12];
+ u32 default_extended_timeout;
+ u8 controller_model_number[32];
+ u8 controller_description[64];
+ u8 controller_serial_number[32];
+ u8 ip_version_string[32];
+ u8 firmware_version_string[32];
+ u8 bios_version_string[32];
+ u8 redboot_version_string[32];
+ u8 driver_version_string[32];
+ u8 fw_on_flash_version_string[32];
+ u32 functionalities_supported;
+ u16 max_cdblength;
+ u8 asic_revision;
+ u8 generational_guid[16];
+ u8 hba_port_count;
+ u16 default_link_down_timeout;
+ u8 iscsi_ver_min_max;
+ u8 multifunction_device;
+ u8 cache_valid;
+ u8 hba_status;
+ u8 max_domains_supported;
+ u8 phy_port;
+ u32 firmware_post_status;
+ u32 hba_mtu[8];
+ u32 rsvd1[4];
+};
+
+struct mgmt_controller_attrib {
+ struct mgmt_hba_attribs hba_attribs;
+ u16 pci_vendor_id;
+ u16 pci_device_id;
+ u16 pci_sub_vendor_id;
+ u16 pci_sub_system_id;
+ u8 pci_bus_number;
+ u8 pci_device_number;
+ u8 pci_function_number;
+ u8 interface_type;
+ u64 unique_identifier;
+ u32 rsvd0[5];
+};
+
+struct ocrdma_get_ctrl_attribs_rsp {
+ struct ocrdma_mbx_hdr hdr;
+ struct mgmt_controller_attrib ctrl_attribs;
+};
+
+
#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
new file mode 100644
index 00000000000..41a9aec9998
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -0,0 +1,616 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <rdma/ib_addr.h>
+#include "ocrdma_stats.h"
+
+static struct dentry *ocrdma_dbgfs_dir;
+
+static int ocrdma_add_stat(char *start, char *pcur,
+ char *name, u64 count)
+{
+ char buff[128] = {0};
+ int cpy_len = 0;
+
+ snprintf(buff, 128, "%s: %llu\n", name, count);
+ cpy_len = strlen(buff);
+
+ if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) {
+ pr_err("%s: No space in stats buff\n", __func__);
+ return 0;
+ }
+
+ memcpy(pcur, buff, cpy_len);
+ return cpy_len;
+}
+
+static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ /* Alloc mbox command mem*/
+ mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
+ sizeof(struct ocrdma_rdma_stats_resp));
+
+ mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va) {
+ pr_err("%s: stats mbox allocation failed\n", __func__);
+ return false;
+ }
+
+ memset(mem->va, 0, mem->size);
+
+ /* Alloc debugfs mem */
+ mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
+ if (!mem->debugfs_mem) {
+ pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+ return false;
+ }
+
+ return true;
+}
+
+static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ if (mem->va)
+ dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
+ mem->va, mem->pa);
+ kfree(mem->debugfs_mem);
+}
+
+static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_mw",
+ (u64)rsrc_stats->mw);
+
+ /* Print the threshold stats */
+ rsrc_stats = &rdma_stats->th_rsrc_stats;
+
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_mw",
+ (u64)rsrc_stats->mw);
+ return stats;
+}
+
+static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat
+ (stats, pcur, "roce_frame_bytes",
+ convert_to_64bit(rx_stats->roce_frame_bytes_lo,
+ rx_stats->roce_frame_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops",
+ (u64)rx_stats->roce_frame_icrc_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops",
+ (u64)rx_stats->roce_frame_payload_len_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "ud_drops",
+ (u64)rx_stats->ud_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "qp1_drops",
+ (u64)rx_stats->qp1_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets",
+ (u64)rx_stats->psn_error_request_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets",
+ (u64)rx_stats->psn_error_resp_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts",
+ (u64)rx_stats->rnr_nak_timeouts);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives",
+ (u64)rx_stats->rnr_nak_receives);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops",
+ (u64)rx_stats->roce_frame_rxmt_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors",
+ (u64)rx_stats->nak_count_psn_sequence_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors",
+ (u64)rx_stats->rc_drop_count_lookup_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks",
+ (u64)rx_stats->rq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks",
+ (u64)rx_stats->srq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frames",
+ convert_to_64bit(rx_stats->roce_frames_lo,
+ rx_stats->roce_frames_hi));
+
+ return stats;
+}
+
+static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "send_pkts",
+ convert_to_64bit(tx_stats->send_pkts_lo,
+ tx_stats->send_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_pkts",
+ convert_to_64bit(tx_stats->write_pkts_lo,
+ tx_stats->write_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_pkts",
+ convert_to_64bit(tx_stats->read_pkts_lo,
+ tx_stats->read_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts",
+ convert_to_64bit(tx_stats->read_rsp_pkts_lo,
+ tx_stats->read_rsp_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_pkts",
+ convert_to_64bit(tx_stats->ack_pkts_lo,
+ tx_stats->ack_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "send_bytes",
+ convert_to_64bit(tx_stats->send_bytes_lo,
+ tx_stats->send_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_bytes",
+ convert_to_64bit(tx_stats->write_bytes_lo,
+ tx_stats->write_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes",
+ convert_to_64bit(tx_stats->read_req_bytes_lo,
+ tx_stats->read_req_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes",
+ convert_to_64bit(tx_stats->read_rsp_bytes_lo,
+ tx_stats->read_rsp_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts",
+ (u64)tx_stats->ack_timeouts);
+
+ return stats;
+}
+
+static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes",
+ convert_to_64bit(wqe_stats->large_send_rc_wqes_lo,
+ wqe_stats->large_send_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes",
+ convert_to_64bit(wqe_stats->large_write_rc_wqes_lo,
+ wqe_stats->large_write_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_wqes",
+ convert_to_64bit(wqe_stats->read_wqes_lo,
+ wqe_stats->read_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes",
+ convert_to_64bit(wqe_stats->frmr_wqes_lo,
+ wqe_stats->frmr_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes",
+ convert_to_64bit(wqe_stats->mw_bind_wqes_lo,
+ wqe_stats->mw_bind_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes",
+ convert_to_64bit(wqe_stats->invalidate_wqes_lo,
+ wqe_stats->invalidate_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops",
+ (u64)wqe_stats->dpp_wqe_drops);
+ return stats;
+}
+
+static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors",
+ (u64)db_err_stats->sq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors",
+ (u64)db_err_stats->cq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors",
+ (u64)db_err_stats->rq_srq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors",
+ (u64)db_err_stats->cq_overflow_errors);
+ return stats;
+}
+
+static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_qp_err_stats *rx_qp_err_stats =
+ &rdma_stats->rx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
+ (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
+ (u64)rx_qp_err_stats->nak_remote_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
+ (u64)rx_qp_err_stats->nak_count_remote_access_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)rx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)rx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)rx_qp_err_stats->local_qp_operation_errors);
+ return stats;
+}
+
+static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_qp_err_stats *tx_qp_err_stats =
+ &rdma_stats->tx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)tx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)tx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)tx_qp_err_stats->local_qp_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->retry_count_exceeded_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors);
+ return stats;
+}
+
+static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_dbg_stats *tx_dbg_stats =
+ &rdma_stats->tx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 100; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ tx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_dbg_stats *rx_dbg_stats =
+ &rdma_stats->rx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 200; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ rx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static void ocrdma_update_stats(struct ocrdma_dev *dev)
+{
+ ulong now = jiffies, secs;
+ int status = 0;
+
+ secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U;
+ if (secs) {
+ /* update */
+ status = ocrdma_mbx_rdma_stats(dev, false);
+ if (status)
+ pr_err("%s: stats mbox failed with status = %d\n",
+ __func__, status);
+ dev->last_stats_time = jiffies;
+ }
+}
+
+static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct ocrdma_stats *pstats = filp->private_data;
+ struct ocrdma_dev *dev = pstats->dev;
+ ssize_t status = 0;
+ char *data = NULL;
+
+ /* No partial reads */
+ if (*ppos != 0)
+ return 0;
+
+ mutex_lock(&dev->stats_lock);
+
+ ocrdma_update_stats(dev);
+
+ switch (pstats->type) {
+ case OCRDMA_RSRC_STATS:
+ data = ocrdma_resource_stats(dev);
+ break;
+ case OCRDMA_RXSTATS:
+ data = ocrdma_rx_stats(dev);
+ break;
+ case OCRDMA_WQESTATS:
+ data = ocrdma_wqe_stats(dev);
+ break;
+ case OCRDMA_TXSTATS:
+ data = ocrdma_tx_stats(dev);
+ break;
+ case OCRDMA_DB_ERRSTATS:
+ data = ocrdma_db_errstats(dev);
+ break;
+ case OCRDMA_RXQP_ERRSTATS:
+ data = ocrdma_rxqp_errstats(dev);
+ break;
+ case OCRDMA_TXQP_ERRSTATS:
+ data = ocrdma_txqp_errstats(dev);
+ break;
+ case OCRDMA_TX_DBG_STATS:
+ data = ocrdma_tx_dbg_stats(dev);
+ break;
+ case OCRDMA_RX_DBG_STATS:
+ data = ocrdma_rx_dbg_stats(dev);
+ break;
+
+ default:
+ status = -EFAULT;
+ goto exit;
+ }
+
+ if (usr_buf_len < strlen(data)) {
+ status = -ENOSPC;
+ goto exit;
+ }
+
+ status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data,
+ strlen(data));
+exit:
+ mutex_unlock(&dev->stats_lock);
+ return status;
+}
+
+static const struct file_operations ocrdma_dbg_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ocrdma_dbgfs_ops_read,
+};
+
+void ocrdma_add_port_stats(struct ocrdma_dev *dev)
+{
+ if (!ocrdma_dbgfs_dir)
+ return;
+
+ /* Create post stats base dir */
+ dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+ if (!dev->dir)
+ goto err;
+
+ dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
+ dev->rsrc_stats.dev = dev;
+ if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+ &dev->rsrc_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_stats.type = OCRDMA_RXSTATS;
+ dev->rx_stats.dev = dev;
+ if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
+ &dev->rx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->wqe_stats.type = OCRDMA_WQESTATS;
+ dev->wqe_stats.dev = dev;
+ if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
+ &dev->wqe_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->tx_stats.type = OCRDMA_TXSTATS;
+ dev->tx_stats.dev = dev;
+ if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
+ &dev->tx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
+ dev->db_err_stats.dev = dev;
+ if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+ &dev->db_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
+ dev->tx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
+ dev->rx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
+ dev->tx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->tx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
+ dev->rx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->rx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ /* Now create dma_mem for stats mbx command */
+ if (!ocrdma_alloc_stats_mem(dev))
+ goto err;
+
+ mutex_init(&dev->stats_lock);
+
+ return;
+err:
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove_recursive(dev->dir);
+ dev->dir = NULL;
+}
+
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
+{
+ if (!dev->dir)
+ return;
+ mutex_destroy(&dev->stats_lock);
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove(dev->dir);
+}
+
+void ocrdma_init_debugfs(void)
+{
+ /* Create base dir in debugfs root dir */
+ ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL);
+}
+
+void ocrdma_rem_debugfs(void)
+{
+ debugfs_remove_recursive(ocrdma_dbgfs_dir);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
new file mode 100644
index 00000000000..5f5e20c46d7
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -0,0 +1,54 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_STATS_H__
+#define __OCRDMA_STATS_H__
+
+#include <linux/debugfs.h>
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+
+#define OCRDMA_MAX_DBGFS_MEM 4096
+
+enum OCRDMA_STATS_TYPE {
+ OCRDMA_RSRC_STATS,
+ OCRDMA_RXSTATS,
+ OCRDMA_WQESTATS,
+ OCRDMA_TXSTATS,
+ OCRDMA_DB_ERRSTATS,
+ OCRDMA_RXQP_ERRSTATS,
+ OCRDMA_TXQP_ERRSTATS,
+ OCRDMA_TX_DBG_STATS,
+ OCRDMA_RX_DBG_STATS
+};
+
+void ocrdma_rem_debugfs(void);
+void ocrdma_init_debugfs(void);
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
+void ocrdma_add_port_stats(struct ocrdma_dev *dev);
+
+#endif /* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 6e982bb43c3..edf6211d84b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
dev = get_ocrdma_dev(ibdev);
memset(sgid, 0, sizeof(*sgid));
- if (index >= OCRDMA_MAX_SGID)
+ if (index > OCRDMA_MAX_SGID)
return -EINVAL;
memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
@@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
attr->max_mr = dev->attr.max_mr;
- attr->max_mw = 0;
+ attr->max_mw = dev->attr.max_mw;
attr->max_pd = dev->attr.max_pd;
attr->atomic_cap = 0;
attr->max_fmr = 0;
@@ -141,10 +141,9 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
/* Unsupported */
*ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
- };
+ }
}
-
int ocrdma_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
@@ -176,7 +175,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
+ IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = OCRDMA_MAX_SGID;
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
@@ -267,7 +266,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
if (udata && uctx) {
pd->dpp_enabled =
- dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+ ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
pd->num_dpp_qp =
pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
}
@@ -726,10 +725,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
u32 num_pbes)
{
struct ocrdma_pbe *pbe;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
struct ib_umem *umem = mr->umem;
- int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
if (!mr->hwmr.num_pbes)
return;
@@ -739,39 +738,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
shift = ilog2(umem->page_size);
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- /* get all the dma regions from the chunk. */
- for (i = 0; i < chunk->nmap; i++) {
- pages = sg_dma_len(&chunk->page_list[i]) >> shift;
- for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (&chunk->page_list[i]) +
- (umem->page_size * pg_cnt));
- pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (&chunk->page_list[i]) +
- umem->page_size * pg_cnt)));
- pbe_cnt += 1;
- total_num_pbes += 1;
- pbe++;
-
- /* if done building pbes, issue the mbx cmd. */
- if (total_num_pbes == num_pbes)
- return;
-
- /* if the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt ==
- (mr->hwmr.pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- pbe_cnt = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> shift;
+ for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+ /* store the page address in pbe */
+ pbe->pa_lo =
+ cpu_to_le32(sg_dma_address
+ (sg) +
+ (umem->page_size * pg_cnt));
+ pbe->pa_hi =
+ cpu_to_le32(upper_32_bits
+ ((sg_dma_address
+ (sg) +
+ umem->page_size * pg_cnt)));
+ pbe_cnt += 1;
+ total_num_pbes += 1;
+ pbe++;
+
+ /* if done building pbes, issue the mbx cmd. */
+ if (total_num_pbes == num_pbes)
+ return;
+
+ /* if the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt ==
+ (mr->hwmr.pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ pbe_cnt = 0;
}
+
}
}
}
@@ -840,8 +837,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
- if (mr->hwmr.fr_mr == 0)
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
if (mr->umem)
@@ -910,6 +906,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
spin_lock_init(&cq->comp_handler_lock);
INIT_LIST_HEAD(&cq->sq_head);
INIT_LIST_HEAD(&cq->rq_head);
+ cq->first_arm = true;
if (ib_ctx) {
uctx = get_ocrdma_ucontext(ib_ctx);
@@ -927,9 +924,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
goto ctx_err;
}
cq->phase = OCRDMA_CQE_VALID;
- cq->arm_needed = true;
dev->cq_tbl[cq->id] = cq;
-
return &cq->ibcq;
ctx_err:
@@ -952,15 +947,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
return status;
}
+static void ocrdma_flush_cq(struct ocrdma_cq *cq)
+{
+ int cqe_cnt;
+ int valid_count = 0;
+ unsigned long flags;
+
+ struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+ struct ocrdma_cqe *cqe = NULL;
+
+ cqe = cq->va;
+ cqe_cnt = cq->cqe_cnt;
+
+ /* Last irq might have scheduled a polling thread
+ * sync-up with it before hard flushing.
+ */
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while (cqe_cnt) {
+ if (is_cqe_valid(cq, cqe))
+ valid_count++;
+ cqe++;
+ cqe_cnt--;
+ }
+ ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
int ocrdma_destroy_cq(struct ib_cq *ibcq)
{
int status;
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_eq *eq = NULL;
struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
int pdid = 0;
+ u32 irq, indx;
- status = ocrdma_mbx_destroy_cq(dev, cq);
+ dev->cq_tbl[cq->id] = NULL;
+ indx = ocrdma_get_eq_table_index(dev, cq->eqn);
+ if (indx == -EINVAL)
+ BUG();
+ eq = &dev->eq_tbl[indx];
+ irq = ocrdma_get_irq(dev, eq);
+ synchronize_irq(irq);
+ ocrdma_flush_cq(cq);
+
+ status = ocrdma_mbx_destroy_cq(dev, cq);
if (cq->ucontext) {
pdid = cq->ucontext->cntxt_pd->id;
ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -969,7 +1001,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
- dev->cq_tbl[cq->id] = NULL;
kfree(cq);
return status;
@@ -1092,15 +1123,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
}
uresp.db_page_addr = usr_db;
uresp.db_page_size = dev->nic_info.db_page_size;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
- uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
- uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
- uresp.db_shift = 24;
- } else {
- uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
- uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
- uresp.db_shift = 16;
- }
+ uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+ uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
if (qp->dpp_enabled) {
uresp.dpp_credit = dpp_credit_lmt;
@@ -1132,7 +1157,7 @@ err:
static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
struct ocrdma_pd *pd)
{
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
qp->sq_db = dev->nic_info.db +
(pd->id * dev->nic_info.db_page_size) +
OCRDMA_DB_GEN2_SQ_OFFSET;
@@ -1182,7 +1207,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
}
-
static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -1268,17 +1292,6 @@ gen_err:
return ERR_PTR(status);
}
-
-static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
-{
- if (qp->db_cache) {
- u32 val = qp->rq.dbid | (qp->db_cache <<
- ocrdma_get_num_posted_shift(qp));
- iowrite32(val, qp->rq_db);
- qp->db_cache = 0;
- }
-}
-
int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask)
{
@@ -1296,9 +1309,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
*/
if (status < 0)
return status;
- status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
- if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
- ocrdma_flush_rq_db(qp);
+ status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
return status;
}
@@ -1326,7 +1337,8 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
@@ -1415,7 +1427,7 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
- OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
OCRDMA_QP_PARAMS_TCLASS_SHIFT;
qp_attr->ah_attr.ah_flags = IB_AH_GRH;
@@ -1509,7 +1521,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
int discard_cnt = 0;
u32 cur_getp, stop_getp;
struct ocrdma_cqe *cqe;
- u32 qpn = 0;
+ u32 qpn = 0, wqe_idx = 0;
spin_lock_irqsave(&cq->cq_lock, cq_flags);
@@ -1538,24 +1550,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
if (qpn == 0 || qpn != qp->id)
goto skip_cqe;
- /* mark cqe discarded so that it is not picked up later
- * in the poll_cq().
- */
- discard_cnt += 1;
- cqe->cmn.qpn = 0;
if (is_cqe_for_sq(cqe)) {
ocrdma_hwq_inc_tail(&qp->sq);
} else {
if (qp->srq) {
+ wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+ OCRDMA_CQE_BUFTAG_SHIFT) &
+ qp->srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
spin_lock_irqsave(&qp->srq->q_lock, flags);
ocrdma_hwq_inc_tail(&qp->srq->rq);
- ocrdma_srq_toggle_bit(qp->srq, cur_getp);
+ ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
spin_unlock_irqrestore(&qp->srq->q_lock, flags);
} else {
ocrdma_hwq_inc_tail(&qp->rq);
}
}
+ /* mark cqe discarded so that it is not picked up later
+ * in the poll_cq().
+ */
+ discard_cnt += 1;
+ cqe->cmn.qpn = 0;
skip_cqe:
cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
} while (cur_getp != stop_getp);
@@ -1658,7 +1675,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
(srq->pd->id * dev->nic_info.db_page_size);
uresp.db_page_size = dev->nic_info.db_page_size;
uresp.num_rqe_allocated = srq->rq.max_cnt;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
uresp.db_shift = 24;
} else {
@@ -1981,9 +1998,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
- if ((wr->wr.fast_reg.page_list_len >
- qp->dev->attr.max_pages_per_frmr) ||
- (wr->wr.fast_reg.length > 0xffffffffULL))
+ if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
return -EINVAL;
hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2010,15 +2025,15 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
fast_reg->size_sge =
get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
- mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
- (OCRDMA_MAX_STAG - 1)];
+ mr = (struct ocrdma_mr *) (unsigned long)
+ qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
return 0;
}
static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
{
- u32 val = qp->sq.dbid | (1 << 16);
+ u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
iowrite32(val, qp->sq_db);
}
@@ -2123,12 +2138,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
{
- u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
+ u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
- if (qp->state != OCRDMA_QPS_INIT)
- iowrite32(val, qp->rq_db);
- else
- qp->db_cache++;
+ iowrite32(val, qp->rq_db);
}
static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -2214,7 +2226,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
if (row == srq->bit_fields_len)
BUG();
- return indx;
+ return indx + 1; /* Use from index 1 */
}
static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
@@ -2331,7 +2343,7 @@ static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
default:
ibwc_status = IB_WC_GENERAL_ERR;
break;
- };
+ }
return ibwc_status;
}
@@ -2370,7 +2382,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
pr_err("%s() invalid opcode received = 0x%x\n",
__func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
break;
- };
+ }
}
static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
@@ -2551,10 +2563,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
srq = get_ocrdma_srq(qp->ibqp.srq);
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
- OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+ OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
+
ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
spin_lock_irqsave(&srq->q_lock, flags);
- ocrdma_srq_toggle_bit(srq, wqe_idx);
+ ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
spin_unlock_irqrestore(&srq->q_lock, flags);
ocrdma_hwq_inc_tail(&srq->rq);
}
@@ -2706,10 +2721,18 @@ expand_cqe:
}
stop_cqe:
cq->getp = cur_getp;
- if (polled_hw_cqes || expand || stop) {
- ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
+ if (cq->deferred_arm) {
+ ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
polled_hw_cqes);
+ cq->deferred_arm = false;
+ cq->deferred_sol = false;
+ } else {
+ /* We need to pop the CQE. No need to arm */
+ ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
+ polled_hw_cqes);
+ cq->deferred_sol = false;
}
+
return i;
}
@@ -2781,30 +2804,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
u16 cq_id;
- u16 cur_getp;
- struct ocrdma_cqe *cqe;
unsigned long flags;
+ bool arm_needed = false, sol_needed = false;
cq_id = cq->id;
spin_lock_irqsave(&cq->cq_lock, flags);
if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
- cq->armed = true;
+ arm_needed = true;
if (cq_flags & IB_CQ_SOLICITED)
- cq->solicited = true;
-
- cur_getp = cq->getp;
- cqe = cq->va + cur_getp;
+ sol_needed = true;
- /* check whether any valid cqe exist or not, if not then safe to
- * arm. If cqe is not yet consumed, then let it get consumed and then
- * we arm it to avoid false interrupts.
- */
- if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
- cq->arm_needed = false;
- ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
+ if (cq->first_arm) {
+ ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
+ cq->first_arm = false;
+ goto skip_defer;
}
+ cq->deferred_arm = true;
+
+skip_defer:
+ cq->deferred_sol = sol_needed;
spin_unlock_irqrestore(&cq->cq_lock, flags);
+
return 0;
}
@@ -2839,7 +2860,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
goto mbx_err;
mr->ibmr.rkey = mr->hwmr.lkey;
mr->ibmr.lkey = mr->hwmr.lkey;
- dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+ dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
+ (unsigned long) mr;
return &mr->ibmr;
mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 1946101419a..c00ae093b6f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -868,8 +868,10 @@ struct qib_devdata {
/* last buffer for user use */
u32 lastctxt_piobuf;
- /* saturating counter of (non-port-specific) device interrupts */
- u32 int_counter;
+ /* reset value */
+ u64 z_int_counter;
+ /* percpu intcounter */
+ u64 __percpu *int_counter;
/* pio bufs allocated per ctxt */
u32 pbufsctxt;
@@ -1184,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *);
void qib_set_ctxtcnt(struct qib_devdata *);
int qib_create_ctxts(struct qib_devdata *dd);
struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *);
void qib_nomsix(struct qib_devdata *);
void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
/*
* dma_addr wrappers - all 0's invalid for hw
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 1686fd4bda8..5dfda4c5cc9 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct qib_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct qib_devdata *dd;
@@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
ppd = &dd->pport[dp.port - 1];
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
- clen = dp.len >> 2;
-
- if ((plen + 4) > ppd->ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
qib_devinfo(dd->pcidev,
@@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
*/
if (dd->flags & QIB_PIO_FLUSH_WC) {
qib_flush_wc();
- qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
qib_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- qib_pio_copy(piobuf + 2, tmpbuf, clen);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen);
if (dd->flags & QIB_USE_SPCL_TRIG) {
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
const struct diag_observer *op)
{
struct diag_observer_list_elt *olp;
- int ret = -EINVAL;
+ unsigned long flags;
if (!dd || !op)
- goto bail;
- ret = -ENOMEM;
+ return -EINVAL;
olp = vmalloc(sizeof *olp);
if (!olp) {
pr_err("vmalloc for observer failed\n");
- goto bail;
+ return -ENOMEM;
}
- if (olp) {
- unsigned long flags;
- spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
- olp->op = op;
- olp->next = dd->diag_observer_list;
- dd->diag_observer_list = olp;
- spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
- ret = 0;
- }
-bail:
- return ret;
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp->op = op;
+ olp->next = dd->diag_observer_list;
+ dd->diag_observer_list = olp;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+ return 0;
}
/* Remove all registered observers when device is closed */
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
index 2920bb39a65..59fe092b4b0 100644
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ b/drivers/infiniband/hw/qib/qib_dma.c
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
.unmap_page = qib_dma_unmap_page,
.map_sg = qib_map_sg,
.unmap_sg = qib_unmap_sg,
- .dma_address = qib_sg_dma_address,
- .dma_len = qib_sg_dma_len,
.sync_single_for_cpu = qib_sync_single_for_cpu,
.sync_single_for_device = qib_sync_single_for_device,
.alloc_coherent = qib_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 275f247f9fc..b15e34eeef6 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
cused++;
else
cfree++;
- if (pusable && cfree && cused < inuse) {
+ if (cfree && cused < inuse) {
udd = dd;
inuse = cused;
}
@@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
struct qib_ctxtdata *rcd = fd->rcd;
struct qib_devdata *dd = rcd->dd;
- if (dd->flags & QIB_HAS_SEND_DMA)
+ if (dd->flags & QIB_HAS_SEND_DMA) {
fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
dd->unit,
@@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
fd->subctxt);
if (!fd->pq)
return -ENOMEM;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f247fc6e618..cab610ccd50 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode,
static ssize_t driver_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ qib_stats.sps_ints = qib_sps_ints();
return simple_read_from_buffer(buf, count, ppos, &qib_stats,
sizeof qib_stats);
}
@@ -456,13 +457,13 @@ static int remove_file(struct dentry *parent, char *name)
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
simple_unlink(parent->d_inode, tmp);
} else {
spin_unlock(&tmp->d_lock);
}
+ dput(tmp);
ret = 0;
bail:
@@ -491,6 +492,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
+ mutex_lock(&dir->d_inode->i_mutex);
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -505,8 +507,10 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- d_delete(dir);
+ mutex_unlock(&dir->d_inode->i_mutex);
ret = simple_rmdir(root->d_inode, dir);
+ d_delete(dir);
+ dput(dir);
bail:
mutex_unlock(&root->d_inode->i_mutex);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 84e593d6007..d68266ac761 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler re-ordering around actual reset */
@@ -3266,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR;
ppd->link_width_enabled = IB_WIDTH_4X;
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 454c2e7668f..7dec89fdc12 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
-
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
unlikely_7220_intr(dd, istat);
@@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler reordering around actual reset */
@@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
init_waitqueue_head(&cpspec->autoneg_wait);
INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf..a7eb32517a0 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2395,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* ensure previous Tx parameters are not still forced */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
if (qib_compat_ddr_negotiate) {
ppd->cpspec->ibdeltainprog = 1;
ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
@@ -3110,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* handle "errors" of various kinds first, device ahead of port */
if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -3181,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
@@ -3210,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -3243,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3272,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3301,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3331,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3718,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
dd->pport->cpspec->ibsymdelta = 0;
dd->pport->cpspec->iblnkerrdelta = 0;
dd->pport->cpspec->ibmalfdelta = 0;
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
/*
* Keep chip from being accessed until we are ready. Use
@@ -6190,21 +6182,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
- int ret;
-
+ char *n;
if (strlen(str) >= MAX_ATTEN_LEN) {
pr_info("txselect_values string too long\n");
return -ENOSPC;
}
- ret = kstrtoul(str, 0, &val);
- if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ val = simple_strtoul(str, &n, 0);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
pr_info("txselect_values must start with a number < %d\n",
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
- return ret ? ret : -EINVAL;
+ return -EINVAL;
}
-
strcpy(txselect_list, str);
+
list_for_each_entry(dd, &qib_dev_list, list)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
set_no_qsfp_atten(dd, 1);
@@ -6553,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
}
dd->num_pports++;
- qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ if (ret) {
+ dd->num_pports--;
+ goto bail;
+ }
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_width_enabled = IB_WIDTH_4X;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 24e802f4ea2..8d3c78ddc90 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
int qib_create_ctxts(struct qib_devdata *dd)
{
unsigned i;
- int ret;
int local_node_id = pcibus_to_node(dd->pcidev->bus);
if (local_node_id < 0)
@@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd)
if (!dd->rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata array, failing\n");
- ret = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* create (one or more) kctxt */
@@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd)
if (!rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata for Kernel ctxt, failing\n");
- ret = -ENOMEM;
- goto done;
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+ return -ENOMEM;
}
rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
rcd->seq_cnt = 1;
}
- ret = 0;
-done:
- return ret;
+ return 0;
}
/*
@@ -233,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
/*
* Common code for initializing the physical port structure.
*/
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
int size;
@@ -243,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->sdma_lock);
spin_lock_init(&ppd->lflags_lock);
+ spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
init_timer(&ppd->symerr_clear_timer);
@@ -250,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
ppd->symerr_clear_timer.data = (unsigned long)ppd;
ppd->qib_wq = NULL;
-
- spin_lock_init(&ppd->cc_shadow_lock);
+ ppd->ibport_data.pmastats =
+ alloc_percpu(struct qib_pma_counters);
+ if (!ppd->ibport_data.pmastats)
+ return -ENOMEM;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
goto bail;
@@ -299,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
goto bail_3;
}
- return;
+ return 0;
bail_3:
kfree(ppd->ccti_entries_shadow);
@@ -313,7 +313,7 @@ bail_1:
bail:
/* User is intentionally disabling the congestion control agent */
if (!qib_cc_table_size)
- return;
+ return 0;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
qib_cc_table_size = 0;
@@ -324,7 +324,7 @@ bail:
qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
port);
- return;
+ return 0;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
static void verify_interrupt(unsigned long opaque)
{
struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ u64 int_counter;
if (!dd)
return; /* being torn down */
@@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque)
* If we don't have a lid or any interrupts, let the user know and
* don't bother checking again.
*/
- if (dd->int_counter == 0) {
+ int_counter = qib_int_counter(dd) - dd->z_int_counter;
+ if (int_counter == 0) {
if (!dd->f_intr_fallback(dd))
dev_err(&dd->pcidev->dev,
"No interrupts detected, not usable.\n");
@@ -633,6 +635,12 @@ wq_error:
return -ENOMEM;
}
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+ free_percpu(ppd->ibport_data.pmastats);
+ ppd->ibport_data.pmastats = NULL;
+}
+
/**
* qib_init - do the actual initialization sequence on the chip
* @dd: the qlogic_ib device
@@ -920,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd)
destroy_workqueue(ppd->qib_wq);
ppd->qib_wq = NULL;
}
+ qib_free_pportdata(ppd);
}
qib_update_eeprom_log(dd);
@@ -1079,9 +1088,34 @@ void qib_free_devdata(struct qib_devdata *dd)
#ifdef CONFIG_DEBUG_FS
qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif
+ free_percpu(dd->int_counter);
ib_dealloc_device(&dd->verbs_dev.ibdev);
}
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+ int cpu;
+ u64 int_counter = 0;
+
+ for_each_possible_cpu(cpu)
+ int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+ return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ sps_ints += qib_int_counter(dd);
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return sps_ints;
+}
+
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
@@ -1097,14 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
int ret;
dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
+ if (!dd)
+ return ERR_PTR(-ENOMEM);
-#ifdef CONFIG_DEBUG_FS
- qib_dbg_ibdev_init(&dd->verbs_dev);
-#endif
+ INIT_LIST_HEAD(&dd->list);
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&qib_devs_lock, flags);
@@ -1121,11 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (ret < 0) {
qib_early_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
-#ifdef CONFIG_DEBUG_FS
- qib_dbg_ibdev_exit(&dd->verbs_dev);
-#endif
- ib_dealloc_device(&dd->verbs_dev.ibdev);
- dd = ERR_PTR(ret);
+ goto bail;
+ }
+ dd->int_counter = alloc_percpu(u64);
+ if (!dd->int_counter) {
+ ret = -ENOMEM;
+ qib_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
goto bail;
}
@@ -1139,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
qib_early_err(&pdev->dev,
"Could not alloc cpulist info, cpu affinity might be wrong\n");
}
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
return dd;
+bail:
+ if (!list_empty(&dd->list))
+ list_del_init(&dd->list);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+ return ERR_PTR(ret);;
}
/*
@@ -1234,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
* Do all the generic driver unit- and chip-independent memory
* allocation and initialization.
*/
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
{
int ret;
@@ -1278,12 +1316,12 @@ bail:
return ret;
}
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
/*
* Do the non-unit driver cleanup, memory free, etc. at unload.
*/
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
{
int ret;
@@ -1308,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
qib_dev_cleanup();
}
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
/* this can only be called after a successful initialization */
static void cleanup_device_data(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ccb119143d2..22c720e5740 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
event.event = IB_EVENT_PKEY_CHANGE;
event.device = &dd->verbs_dev.ibdev;
- event.element.port_num = 1;
+ event.element.port_num = port;
ib_dispatch_event(&event);
}
return 0;
@@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
return reply((struct ib_smp *)pmp);
}
+static void qib_snapshot_pmacounters(
+ struct qib_ibport *ibp,
+ struct qib_pma_counters *pmacounters)
+{
+ struct qib_pma_counters *p;
+ int cpu;
+
+ memset(pmacounters, 0, sizeof(*pmacounters));
+ for_each_possible_cpu(cpu) {
+ p = per_cpu_ptr(ibp->pmastats, cpu);
+ pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+ pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+ pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+ pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+ }
+}
+
static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
@@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
u8 port_select = p->port_select;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
p->port_rcv_data = cpu_to_be64(rwords);
p->port_xmit_packets = cpu_to_be64(spkts);
p->port_rcv_packets = cpu_to_be64(rpkts);
- p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
- p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
- p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
- p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+ - ibp->z_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+ - ibp->z_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+ - ibp->z_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+ - ibp->z_multicast_rcv);
bail:
return reply((struct ib_smp *) pmp);
@@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
@@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
ibp->z_port_rcv_packets = rpkts;
+ qib_snapshot_pmacounters(ibp, &pma);
+
if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
- ibp->n_unicast_xmit = 0;
+ ibp->z_unicast_xmit = pma.n_unicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
- ibp->n_unicast_rcv = 0;
+ ibp->z_unicast_rcv = pma.n_unicast_rcv;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
- ibp->n_multicast_xmit = 0;
+ ibp->z_multicast_xmit = pma.n_multicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
- ibp->n_multicast_rcv = 0;
+ ibp->z_multicast_rcv = pma.n_multicast_rcv;
return pma_get_portcounters_ext(pmp, ibdev, port);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f..941d4d50d8e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_notice_attr {
u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
__be16 reserved;
__be16 lid; /* where violation happened */
u8 port_num; /* where violation happened */
- } __attribute__ ((packed)) ntc_129_131;
+ } __packed ntc_129_131;
struct {
__be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
__be32 new_cap_mask; /* new capability mask */
u8 reserved3;
u8 change_flags; /* low 3 bits only */
- } __attribute__ ((packed)) ntc_144;
+ } __packed ntc_144;
struct {
__be16 reserved;
__be16 lid; /* lid where sys guid changed */
__be16 reserved2;
__be64 new_sys_guid;
- } __attribute__ ((packed)) ntc_145;
+ } __packed ntc_145;
struct {
__be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
u8 reserved3;
u8 dr_trunc_hop;
u8 dr_rtn_path[30];
- } __attribute__ ((packed)) ntc_256;
+ } __packed ntc_256;
struct {
__be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
__be32 qp2; /* high 8 bits reserved */
union ib_gid gid1;
union ib_gid gid2;
- } __attribute__ ((packed)) ntc_257_258;
+ } __packed ntc_257_258;
} details;
};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index e6687ded821..9bbb55347cc 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct qib_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ struct scatterlist *sg;
+ int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
@@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
+ n = umem->nmap;
mr = alloc_mr(n, pd);
if (IS_ERR(mr)) {
@@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
- vaddr = page_address(sg_page(&chunk->page_list[i]));
+ vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
@@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m++;
n = 0;
}
- }
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 3f14009fb66..61a0046efb7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -51,8 +51,8 @@
* file calls, even though this violates some
* expectations of harmlessness.
*/
-static int qib_tune_pcie_caps(struct qib_devdata *);
-static int qib_tune_pcie_coalesce(struct qib_devdata *);
+static void qib_tune_pcie_caps(struct qib_devdata *);
+static void qib_tune_pcie_coalesce(struct qib_devdata *);
/*
* Do all the common PCIe setup and initialization.
@@ -197,46 +197,47 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
struct qib_msix_entry *qib_msix_entry)
{
int ret;
- u32 tabsize = 0;
- u16 msix_flags;
+ int nvec = *msixcnt;
struct msix_entry *msix_entry;
int i;
+ ret = pci_msix_vec_count(dd->pcidev);
+ if (ret < 0)
+ goto do_intx;
+
+ nvec = min(nvec, ret);
+
/* We can't pass qib_msix_entry array to qib_msix_setup
* so use a dummy msix_entry array and copy the allocated
* irq back to the qib_msix_entry array. */
- msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
+ msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL);
+ if (!msix_entry)
goto do_intx;
- }
- for (i = 0; i < *msixcnt; i++)
+
+ for (i = 0; i < nvec; i++)
msix_entry[i] = qib_msix_entry[i].msix;
- pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
- tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
- if (tabsize > *msixcnt)
- tabsize = *msixcnt;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- if (ret > 0) {
- tabsize = ret;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- }
-do_intx:
- if (ret) {
- qib_dev_err(dd,
- "pci_enable_msix %d vectors failed: %d, falling back to INTx\n",
- tabsize, ret);
- tabsize = 0;
- }
- for (i = 0; i < tabsize; i++)
+ ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
+ if (ret < 0)
+ goto free_msix_entry;
+ else
+ nvec = ret;
+
+ for (i = 0; i < nvec; i++)
qib_msix_entry[i].msix = msix_entry[i];
+
kfree(msix_entry);
- *msixcnt = tabsize;
+ *msixcnt = nvec;
+ return;
- if (ret)
- qib_enable_intx(dd->pcidev);
+free_msix_entry:
+ kfree(msix_entry);
+do_intx:
+ qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, "
+ "falling back to INTx\n", nvec, ret);
+ *msixcnt = 0;
+ qib_enable_intx(dd->pcidev);
}
/**
@@ -476,30 +477,6 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
"pci_enable_device failed after reset: %d\n", r);
}
-/* code to adjust PCIe capabilities. */
-
-static int fld2val(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- wd &= mask;
- lsbmask = mask ^ (mask & (mask - 1));
- wd /= lsbmask;
- return wd;
-}
-
-static int val2fld(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- lsbmask = mask ^ (mask & (mask - 1));
- wd *= lsbmask;
- return wd;
-}
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
@@ -511,7 +488,7 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
* of these chipsets, with some BIOS settings, and enabling it on those
* systems may result in the system crashing, and/or data corruption.
*/
-static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
+static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
{
int r;
struct pci_dev *parent;
@@ -519,18 +496,18 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
u32 mask, bits, val;
if (!qib_pcie_coalesce)
- return 0;
+ return;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
if (parent->bus->parent) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- return 1;
+ return;
}
if (!pci_is_pcie(parent))
- return 1;
+ return;
if (parent->vendor != 0x8086)
- return 1;
+ return;
/*
* - bit 12: Max_rdcmp_Imt_EN: need to set to 1
@@ -563,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
mask = (3U << 24) | (7U << 10);
} else {
/* not one of the chipsets that we know about */
- return 1;
+ return;
}
pci_read_config_dword(parent, 0x48, &val);
val &= ~mask;
val |= bits;
r = pci_write_config_dword(parent, 0x48, val);
- return 0;
}
/*
@@ -580,55 +556,44 @@ static int qib_pcie_caps;
module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-static int qib_tune_pcie_caps(struct qib_devdata *dd)
+static void qib_tune_pcie_caps(struct qib_devdata *dd)
{
- int ret = 1; /* Assume the worst */
struct pci_dev *parent;
- u16 pcaps, pctl, ecaps, ectl;
- int rc_sup, ep_sup;
- int rc_cur, ep_cur;
+ u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
+ u16 rc_mrrs, ep_mrrs, max_mrrs;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
- if (parent->bus->parent) {
+ if (!pci_is_root_bus(parent->bus)) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- goto bail;
+ return;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
- goto bail;
- pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps);
- pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl);
+ return;
+
+ rc_mpss = parent->pcie_mpss;
+ rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+ ep_mpss = dd->pcidev->pcie_mpss;
+ ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
- ret = 0;
/* Find max payload supported by root, endpoint */
- rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD);
- ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD);
- if (rc_sup > ep_sup)
- rc_sup = ep_sup;
-
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
+ if (rc_mpss > ep_mpss)
+ rc_mpss = ep_mpss;
/* If Supported greater than limit in module param, limit it */
- if (rc_sup > (qib_pcie_caps & 7))
- rc_sup = qib_pcie_caps & 7;
+ if (rc_mpss > (qib_pcie_caps & 7))
+ rc_mpss = qib_pcie_caps & 7;
/* If less than (allowed, supported), bump root payload */
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ if (rc_mpss > rc_mps) {
+ rc_mps = rc_mpss;
+ pcie_set_mps(parent, 128 << rc_mps);
}
/* If less than (allowed, supported), bump endpoint payload */
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (rc_mpss > ep_mps) {
+ ep_mps = rc_mpss;
+ pcie_set_mps(dd->pcidev, 128 << ep_mps);
}
/*
@@ -636,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
* No field for max supported, but PCIe spec limits it to 4096,
* which is code '5' (log2(4096) - 7)
*/
- rc_sup = 5;
- if (rc_sup > ((qib_pcie_caps >> 4) & 7))
- rc_sup = (qib_pcie_caps >> 4) & 7;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ max_mrrs = 5;
+ if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
+ max_mrrs = (qib_pcie_caps >> 4) & 7;
+
+ max_mrrs = 128 << max_mrrs;
+ rc_mrrs = pcie_get_readrq(parent);
+ ep_mrrs = pcie_get_readrq(dd->pcidev);
+
+ if (max_mrrs > rc_mrrs) {
+ rc_mrrs = max_mrrs;
+ pcie_set_readrq(parent, rc_mrrs);
}
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (max_mrrs > ep_mrrs) {
+ ep_mrrs = max_mrrs;
+ pcie_set_readrq(dd->pcidev, ep_mrrs);
}
-bail:
- return ret;
}
/* End of PCIe capability tuning */
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 3cca55b51e5..7fcc150d603 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -585,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 3ab341320ea..2f2501890c4 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
qib_flush_wc();
qib_sendbuf_done(dd, pbufn);
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
goto done;
queue_ack:
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 357b6cfcd46..4c07a8b34ff 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d6c7fe7f88d..aaf7039f8ed 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
struct qib_sge *sge;
struct ib_wc wc;
u32 length;
+ enum ib_qp_type sqptype, dqptype;
qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
}
- if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+
+ sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : sqp->ibqp.qp_type;
+ dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : qp->ibqp.qp_type;
+
+ if (dqptype != sqptype ||
!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;
@@ -273,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
- ibp->n_multicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
/*
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d..d2806cae234 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -52,6 +52,17 @@
/* attempt to drain the queue for 5secs */
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+ struct rb_node node;
+ int refcount;
+ pid_t pid;
+};
+
struct qib_user_sdma_pkt {
struct list_head list; /* list element */
@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
/* dma page table */
struct rb_root dma_pages_root;
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
/* protect everything above... */
struct mutex lock;
};
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ sdma_rb_node = container_of(node,
+ struct qib_user_sdma_rb_node, node);
+ if (pid < sdma_rb_node->pid)
+ node = node->rb_left;
+ else if (pid > sdma_rb_node->pid)
+ node = node->rb_right;
+ else
+ return sdma_rb_node;
+ }
+ return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct qib_user_sdma_rb_node *got;
+
+ while (*node) {
+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ parent = *node;
+ if (new->pid < got->pid)
+ node = &((*node)->rb_left);
+ else if (new->pid > got->pid)
+ node = &((*node)->rb_right);
+ else
+ return 0;
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, root);
+ return 1;
+}
+
struct qib_user_sdma_queue *
qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
{
struct qib_user_sdma_queue *pq =
kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+ struct qib_user_sdma_rb_node *sdma_rb_node;
if (!pq)
goto done;
@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->num_pending = 0;
pq->num_sending = 0;
pq->added = 0;
+ pq->sdma_rb_node = NULL;
INIT_LIST_HEAD(&pq->sent);
spin_lock_init(&pq->sent_lock);
@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->dma_pages_root = RB_ROOT;
+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+ current->pid);
+ if (sdma_rb_node) {
+ sdma_rb_node->refcount++;
+ } else {
+ int ret;
+ sdma_rb_node = kmalloc(sizeof(
+ struct qib_user_sdma_rb_node), GFP_KERNEL);
+ if (!sdma_rb_node)
+ goto err_rb;
+
+ sdma_rb_node->refcount = 1;
+ sdma_rb_node->pid = current->pid;
+
+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+ sdma_rb_node);
+ BUG_ON(ret == 0);
+ }
+ pq->sdma_rb_node = sdma_rb_node;
+
goto done;
+err_rb:
+ dma_pool_destroy(pq->header_cache);
err_slab:
kmem_cache_destroy(pq->pkt_slab);
err_kfree:
@@ -594,8 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
else
j = npages;
- ret = get_user_pages(current, current->mm, addr,
- j, 0, 1, pages, NULL);
+ ret = get_user_pages_fast(addr, j, 0, pages);
if (ret != j) {
i = 0;
j = ret;
@@ -1021,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
if (!pq)
return;
- kmem_cache_destroy(pq->pkt_slab);
+ pq->sdma_rb_node->refcount--;
+ if (pq->sdma_rb_node->refcount == 0) {
+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+ kfree(pq->sdma_rb_node);
+ }
dma_pool_destroy(pq->header_cache);
+ kmem_cache_destroy(pq->pkt_slab);
kfree(pq);
}
@@ -1242,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
struct list_head *pktlist, int count)
{
- int ret = 0;
unsigned long flags;
if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
return -ECOMM;
- spin_lock_irqsave(&ppd->sdma_lock, flags);
-
- if (unlikely(!__qib_sdma_running(ppd))) {
- ret = -ECOMM;
- goto unlock;
+ /* non-blocking mode */
+ if (pq->sdma_rb_node->refcount > 1) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return 0;
}
+ /* In this case, descriptors from this process are not
+ * linked to ppd pending queue, interrupt handler
+ * won't update this process, it is OK to directly
+ * modify without sdma lock.
+ */
+
+
pq->num_pending += count;
- list_splice_tail_init(pktlist, &ppd->sdma_userpending);
- qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ /*
+ * Blocking mode for single rail process, we must
+ * release/regain sdma_lock to give other process
+ * chance to make progress. This is important for
+ * performance.
+ */
+ do {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ qib_user_sdma_send_desc(ppd, pktlist);
+ if (!list_empty(pktlist))
+ qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ } while (!list_empty(pktlist));
-unlock:
- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
- return ret;
+ return 0;
}
int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -1291,14 +1400,11 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
qib_user_sdma_queue_clean(ppd, pq);
while (dim) {
- int mxp = 8;
+ int mxp = 1;
int ndesc = 0;
- down_write(&current->mm->mmap_sem);
ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
iov, dim, &list, &mxp, &ndesc);
- up_write(&current->mm->mmap_sem);
-
if (ret < 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 092b0bb1bb7..9bcfbd84298 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
if (mcast == NULL)
goto drop;
- ibp->n_multicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_multicast_rcv);
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
/*
@@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
&rcd->lookaside_qp->refcount))
wake_up(
&rcd->lookaside_qp->wait);
- rcd->lookaside_qp = NULL;
- }
+ rcd->lookaside_qp = NULL;
+ }
}
if (!rcd->lookaside_qp) {
qp = qib_lookup_qpn(ibp, qp_num);
@@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
rcd->lookaside_qpn = qp_num;
} else
qp = rcd->lookaside_qp;
- ibp->n_unicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_unicast_rcv);
qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
}
return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575a..bfc8948fdd3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
-} __attribute__ ((packed));
+} __packed;
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
struct qib_other_headers {
__be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
-} __attribute__ ((packed));
+} __packed;
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
} l;
struct qib_other_headers oth;
} u;
-} __attribute__ ((packed));
+} __packed;
struct qib_pio_header {
__le32 pbc[2];
struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
/*
* There is one struct qib_mcast for each multicast GID.
@@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx {
struct qib_opcode_stats stats[128];
};
+struct qib_pma_counters {
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+};
+
struct qib_ibport {
struct qib_qp __rcu *qp0;
struct qib_qp __rcu *qp1;
@@ -680,10 +687,11 @@ struct qib_ibport {
__be64 mkey;
__be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
u64 tid; /* TID for traps */
- u64 n_unicast_xmit; /* total unicast packets sent */
- u64 n_unicast_rcv; /* total unicast packets received */
- u64 n_multicast_xmit; /* total multicast packets sent */
- u64 n_multicast_rcv; /* total multicast packets received */
+ struct qib_pma_counters __percpu *pmastats;
+ u64 z_unicast_xmit; /* starting count for PMA */
+ u64 z_unicast_rcv; /* starting count for PMA */
+ u64 z_multicast_xmit; /* starting count for PMA */
+ u64 z_multicast_rcv; /* starting count for PMA */
u64 z_symbol_error_counter; /* starting count for PMA */
u64 z_link_error_recovery_counter; /* starting count for PMA */
u64 z_link_downed_counter; /* starting count for PMA */
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
new file mode 100644
index 00000000000..29ab11c34f3
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -0,0 +1,10 @@
+config INFINIBAND_USNIC
+ tristate "Verbs support for Cisco VIC"
+ depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+ select ENIC
+ select NET_VENDOR_CISCO
+ select PCI_IOV
+ select INFINIBAND_USER_ACCESS
+ ---help---
+ This is a low-level driver for Cisco's Virtual Interface
+ Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile
new file mode 100644
index 00000000000..99fb2db47cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Makefile
@@ -0,0 +1,15 @@
+ccflags-y := -Idrivers/net/ethernet/cisco/enic
+
+obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
+
+usnic_verbs-y=\
+usnic_fwd.o \
+usnic_transport.o \
+usnic_uiom.o \
+usnic_uiom_interval_tree.o \
+usnic_vnic.o \
+usnic_ib_main.o \
+usnic_ib_qp_grp.o \
+usnic_ib_sysfs.o \
+usnic_ib_verbs.o \
+usnic_debugfs.o \
diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h
new file mode 100644
index 00000000000..5be13d8991b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_H_
+#define USNIC_H_
+
+#define DRV_NAME "usnic_verbs"
+
+#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */
+
+#define DRV_VERSION "1.0.3"
+#define DRV_RELDATE "December 19, 2013"
+
+#endif /* USNIC_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
new file mode 100644
index 00000000000..04a66229584
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef USNIC_ABI_H
+#define USNIC_ABI_H
+
+/* ABI between userspace and kernel */
+#define USNIC_UVERBS_ABI_VERSION 4
+
+#define USNIC_QP_GRP_MAX_WQS 8
+#define USNIC_QP_GRP_MAX_RQS 8
+#define USNIC_QP_GRP_MAX_CQS 16
+
+enum usnic_transport_type {
+ USNIC_TRANSPORT_UNKNOWN = 0,
+ USNIC_TRANSPORT_ROCE_CUSTOM = 1,
+ USNIC_TRANSPORT_IPV4_UDP = 2,
+ USNIC_TRANSPORT_MAX = 3,
+};
+
+struct usnic_transport_spec {
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ uint32_t sock_fd;
+ } udp;
+ };
+};
+
+struct usnic_ib_create_qp_cmd {
+ struct usnic_transport_spec spec;
+};
+
+/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
+struct usnic_ib_create_qp_resp {
+ u32 vfid;
+ u32 qp_grp_id;
+ u64 bar_bus_addr;
+ u32 bar_len;
+/*
+ * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * expands the scope of ABI to many files.
+ */
+ u32 wq_cnt;
+ u32 rq_cnt;
+ u32 cq_cnt;
+ u32 wq_idx[USNIC_QP_GRP_MAX_WQS];
+ u32 rq_idx[USNIC_QP_GRP_MAX_RQS];
+ u32 cq_idx[USNIC_QP_GRP_MAX_CQS];
+ u32 transport;
+ u32 reserved[9];
+};
+
+#endif /* USNIC_ABI_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
new file mode 100644
index 00000000000..39356726614
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_PKT_HDR_H
+#define USNIC_CMN_PKT_HDR_H
+
+#define USNIC_ROCE_ETHERTYPE (0x8915)
+#define USNIC_ROCE_GRH_VER (8)
+#define USNIC_PROTO_VER (1)
+#define USNIC_ROCE_GRH_VER_SHIFT (4)
+
+#endif /* USNIC_COMMON_PKT_HDR_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
new file mode 100644
index 00000000000..9d737ed5e55
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_UTIL_H
+#define USNIC_CMN_UTIL_H
+
+static inline void
+usnic_mac_to_gid(const char *const mac, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 6);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 2);
+ memcpy(&raw_gid[4], &inaddr, 4);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
+{
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
new file mode 100644
index 00000000000..5d13860161a
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "usnic.h"
+#include "usnic_log.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_transport.h"
+
+static struct dentry *debugfs_root;
+static struct dentry *flows_dentry;
+
+static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ char buf[500];
+ int res;
+
+ if (*ppos > 0)
+ return 0;
+
+ res = scnprintf(buf, sizeof(buf),
+ "version: %s\n"
+ "build date: %s\n",
+ DRV_VERSION, DRV_RELDATE);
+
+ return simple_read_from_buffer(data, count, ppos, buf, res);
+}
+
+static const struct file_operations usnic_debugfs_buildinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = usnic_debugfs_buildinfo_read
+};
+
+static ssize_t flowinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ int n;
+ int left;
+ char *ptr;
+ char buf[512];
+
+ qp_flow = f->private_data;
+ ptr = buf;
+ left = count;
+
+ if (*ppos > 0)
+ return 0;
+
+ spin_lock(&qp_flow->qp_grp->lock);
+ n = scnprintf(ptr, left,
+ "QP Grp ID: %d Transport: %s ",
+ qp_flow->qp_grp->grp_id,
+ usnic_transport_to_str(qp_flow->trans_type));
+ UPDATE_PTR_LEFT(n, ptr, left);
+ if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ n = scnprintf(ptr, left, "Port_Num:%hu\n",
+ qp_flow->usnic_roce.port_num);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
+ n = usnic_transport_sock_to_str(ptr, left,
+ qp_flow->udp.sock);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ spin_unlock(&qp_flow->qp_grp->lock);
+
+ return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
+}
+
+static const struct file_operations flowinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = flowinfo_read,
+};
+
+void usnic_debugfs_init(void)
+{
+ debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR(debugfs_root)) {
+ usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
+ goto out_clear_root;
+ }
+
+ flows_dentry = debugfs_create_dir("flows", debugfs_root);
+ if (IS_ERR_OR_NULL(flows_dentry)) {
+ usnic_err("Failed to create debugfs flow dir with err %ld\n",
+ PTR_ERR(flows_dentry));
+ goto out_free_root;
+ }
+
+ debugfs_create_file("build-info", S_IRUGO, debugfs_root,
+ NULL, &usnic_debugfs_buildinfo_ops);
+ return;
+
+out_free_root:
+ debugfs_remove_recursive(debugfs_root);
+out_clear_root:
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_exit(void)
+{
+ if (!debugfs_root)
+ return;
+
+ debugfs_remove_recursive(debugfs_root);
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (IS_ERR_OR_NULL(flows_dentry))
+ return;
+
+ scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
+ "%u", qp_flow->flow->flow_id);
+ qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
+ S_IRUGO,
+ flows_dentry,
+ qp_flow,
+ &flowinfo_ops);
+ if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
+ usnic_err("Failed to create dbg fs entry for flow %u\n",
+ qp_flow->flow->flow_id);
+ }
+}
+
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
+ debugfs_remove(qp_flow->dbgfs_dentry);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h
new file mode 100644
index 00000000000..4087d24a88f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef USNIC_DEBUGFS_H_
+#define USNIC_DEBUGFS_H_
+
+#include "usnic_ib_qp_grp.h"
+
+void usnic_debugfs_init(void);
+
+void usnic_debugfs_exit(void);
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
+
+#endif /*!USNIC_DEBUGFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
new file mode 100644
index 00000000000..e3c9bd9d3ba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "enic_api.h"
+#include "usnic_common_pkt_hdr.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+
+static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0,
+ u64 *a1)
+{
+ int status;
+ struct net_device *netdev = ufdev->netdev;
+
+ lockdep_assert_held(&ufdev->lock);
+
+ status = enic_api_devcmd_proxy_by_index(netdev,
+ vnic_idx,
+ cmd,
+ a0, a1,
+ 1000);
+ if (status) {
+ if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
+ usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
+ ufdev->name, vnic_idx, cmd);
+ } else {
+ usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
+ ufdev->name, vnic_idx, cmd,
+ status);
+ }
+ } else {
+ usnic_dbg("Dev %s vnic idx %u cmd %u success",
+ ufdev->name, vnic_idx, cmd);
+ }
+
+ return status;
+}
+
+static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
+{
+ struct usnic_fwd_dev *ufdev;
+
+ ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
+ if (!ufdev)
+ return NULL;
+
+ ufdev->pdev = pdev;
+ ufdev->netdev = pci_get_drvdata(pdev);
+ spin_lock_init(&ufdev->lock);
+ strncpy(ufdev->name, netdev_name(ufdev->netdev),
+ sizeof(ufdev->name) - 1);
+
+ return ufdev;
+}
+
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
+{
+ kfree(ufdev);
+}
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
+{
+ spin_lock(&ufdev->lock);
+ memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+ spin_unlock(&ufdev->lock);
+}
+
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ if (ufdev->inaddr == 0) {
+ ufdev->inaddr = inaddr;
+ status = 0;
+ } else {
+ status = -EFAULT;
+ }
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->inaddr = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 1;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->mtu = mtu;
+ spin_unlock(&ufdev->lock);
+}
+
+static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
+{
+ lockdep_assert_held(&ufdev->lock);
+
+ if (!ufdev->link_up)
+ return -EPERM;
+
+ return 0;
+}
+
+static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
+ struct filter *filter)
+{
+
+ lockdep_assert_held(&ufdev->lock);
+
+ if (filter->type == FILTER_IPV4_5TUPLE) {
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
+ return -EACCES;
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
+ return -EBUSY;
+ else if (ufdev->inaddr == 0)
+ return -EINVAL;
+ else if (filter->u.ipv4.dst_port == 0)
+ return -ERANGE;
+ else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
+ return -EFAULT;
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
+ struct filter_action *action)
+{
+ tlv->type = CLSF_TLV_FILTER;
+ tlv->length = sizeof(struct filter);
+ *((struct filter *)&tlv->val) = *filter;
+
+ tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
+ sizeof(struct filter));
+ tlv->type = CLSF_TLV_ACTION;
+ tlv->length = sizeof(struct filter_action);
+ *((struct filter_action *)&tlv->val) = *action;
+}
+
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *uaction)
+{
+ struct filter_tlv *tlv;
+ struct pci_dev *pdev;
+ struct usnic_fwd_flow *flow;
+ uint64_t a0, a1;
+ uint64_t tlv_size;
+ dma_addr_t tlv_pa;
+ int status;
+
+ pdev = ufdev->pdev;
+ tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
+ sizeof(struct filter_action));
+
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ if (!tlv) {
+ usnic_err("Failed to allocate memory\n");
+ status = -ENOMEM;
+ goto out_free_flow;
+ }
+
+ fill_tlv(tlv, filter, &uaction->action);
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_dev_ready_locked(ufdev);
+ if (status) {
+ usnic_err("Forwarding dev %s not ready with status %d\n",
+ ufdev->name, status);
+ goto out_free_tlv;
+ }
+
+ status = validate_filter_locked(ufdev, filter);
+ if (status) {
+ usnic_err("Failed to validate filter with status %d\n",
+ status);
+ goto out_free_tlv;
+ }
+
+ /* Issue Devcmd */
+ a0 = tlv_pa;
+ a1 = tlv_size;
+ status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
+ CMD_ADD_FILTER, &a0, &a1);
+ if (status) {
+ usnic_err("VF %s Filter add failed with status:%d",
+ ufdev->name, status);
+ status = -EFAULT;
+ goto out_free_tlv;
+ } else {
+ usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
+ }
+
+ flow->flow_id = (uint32_t) a0;
+ flow->vnic_idx = uaction->vnic_idx;
+ flow->ufdev = ufdev;
+
+out_free_tlv:
+ spin_unlock(&ufdev->lock);
+ pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ if (!status)
+ return flow;
+out_free_flow:
+ kfree(flow);
+ return ERR_PTR(status);
+}
+
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
+{
+ int status;
+ u64 a0, a1;
+
+ a0 = flow->flow_id;
+
+ status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
+ CMD_DEL_FILTER, &a0, &a1);
+ if (status) {
+ if (status == ERR_EINVAL) {
+ usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
+ flow->flow_id, flow->vnic_idx,
+ flow->ufdev->name, status);
+ } else {
+ usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id, status);
+ }
+ status = 0;
+ /*
+ * Log the error and fake success to the caller because if
+ * a flow fails to be deleted in the firmware, it is an
+ * unrecoverable error.
+ */
+ } else {
+ usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id);
+ }
+
+ kfree(flow);
+ return status;
+}
+
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ struct net_device *pf_netdev;
+ u64 a0, a1;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
+ netdev_name(pf_netdev),
+ vnic_idx, qp_idx);
+ }
+
+ return status;
+}
+
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ u64 a0, a1;
+ struct net_device *pf_netdev;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx);
+ }
+
+ return status;
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
new file mode 100644
index 00000000000..93713a2230b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_FWD_H_
+#define USNIC_FWD_H_
+
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/in.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_pkt_hdr.h"
+#include "vnic_devcmd.h"
+
+struct usnic_fwd_dev {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ spinlock_t lock;
+ /*
+ * The following fields can be read directly off the device.
+ * However, they should be set by a accessor function, except name,
+ * which cannot be changed.
+ */
+ bool link_up;
+ char mac[ETH_ALEN];
+ unsigned int mtu;
+ __be32 inaddr;
+ char name[IFNAMSIZ+1];
+};
+
+struct usnic_fwd_flow {
+ uint32_t flow_id;
+ struct usnic_fwd_dev *ufdev;
+ unsigned int vnic_idx;
+};
+
+struct usnic_filter_action {
+ int vnic_idx;
+ struct filter_action action;
+};
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
+
+/*
+ * Allocate a flow on this forwarding device. Whoever calls this function,
+ * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
+ * NETDEV_DOWN is seen, flow will no longer function and must be
+ * immediately freed by calling usnic_dealloc_flow.
+ */
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *action);
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+
+static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
+ uint32_t usnic_id)
+{
+ filter->type = FILTER_USNIC_ID;
+ filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+ filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
+ FILTER_FIELD_USNIC_ID |
+ FILTER_FIELD_USNIC_PROTO;
+ filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
+ USNIC_ROCE_GRH_VER_SHIFT) |
+ USNIC_PROTO_VER;
+ filter->u.usnic.usnic_id = usnic_id;
+}
+
+static inline void usnic_fwd_init_udp_filter(struct filter *filter,
+ uint32_t daddr, uint16_t dport)
+{
+ filter->type = FILTER_IPV4_5TUPLE;
+ filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
+ filter->u.ipv4.protocol = PROTO_UDP;
+
+ if (daddr) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
+ filter->u.ipv4.dst_addr = daddr;
+ }
+
+ if (dport) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
+ filter->u.ipv4.dst_port = dport;
+ }
+}
+
+#endif /* !USNIC_FWD_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h
new file mode 100644
index 00000000000..e5a9297dd1b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_H_
+#define USNIC_IB_H_
+
+#include <linux/iommu.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+
+
+#include "usnic.h"
+#include "usnic_abi.h"
+#include "usnic_vnic.h"
+
+#define USNIC_IB_PORT_CNT 1
+#define USNIC_IB_NUM_COMP_VECTORS 1
+
+extern unsigned int usnic_ib_share_vf;
+
+struct usnic_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ /* Protected by usnic_ib_dev->usdev_lock */
+ struct list_head qp_grp_list;
+ struct list_head link;
+};
+
+struct usnic_ib_pd {
+ struct ib_pd ibpd;
+ struct usnic_uiom_pd *umem_pd;
+};
+
+struct usnic_ib_mr {
+ struct ib_mr ibmr;
+ struct usnic_uiom_reg *umem;
+};
+
+struct usnic_ib_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ struct usnic_fwd_dev *ufdev;
+ struct list_head ib_dev_link;
+ struct list_head vf_dev_list;
+ struct list_head ctx_list;
+ struct mutex usdev_lock;
+
+ /* provisioning information */
+ struct kref vf_cnt;
+ unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
+
+ /* sysfs vars for QPN reporting */
+ struct kobject *qpn_kobj;
+};
+
+struct usnic_ib_vf {
+ struct usnic_ib_dev *pf;
+ spinlock_t lock;
+ struct usnic_vnic *vnic;
+ unsigned int qp_grp_ref_cnt;
+ struct usnic_ib_pd *pd;
+ struct list_head link;
+};
+
+static inline
+struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct usnic_ib_dev, ib_dev);
+}
+
+static inline
+struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct usnic_ib_pd, ibpd);
+}
+
+static inline
+struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct usnic_ib_mr, ibmr);
+}
+void usnic_ib_log_vf(struct usnic_ib_vf *vf);
+
+#define UPDATE_PTR_LEFT(N, P, L) \
+do { \
+ L -= (N); \
+ P += (N); \
+} while (0)
+
+#endif /* USNIC_IB_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
new file mode 100644
index 00000000000..fb6d026f92c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Upinder Malhi <umalhi@cisco.com>
+ * Author: Anant Deepak <anadeepa@cisco.com>
+ * Author: Cesare Cantu' <cantuc@cisco.com>
+ * Author: Jeff Squyres <jsquyres@cisco.com>
+ * Author: Kiran Thirumalai <kithirum@cisco.com>
+ * Author: Xuyang Wang <xuywang@cisco.com>
+ * Author: Reese Faucette <rfaucett@cisco.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_log.h"
+#include "usnic_fwd.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_transport.h"
+#include "usnic_uiom.h"
+#include "usnic_ib_sysfs.h"
+
+unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
+unsigned int usnic_ib_share_vf = 1;
+
+static const char usnic_version[] =
+ DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
+static LIST_HEAD(usnic_ib_ibdev_list);
+
+/* Callback dump funcs */
+static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_vf *vf = obj;
+ return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+}
+/* End callback dump funcs */
+
+static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
+{
+ usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
+ usnic_ib_dump_vf_hdr,
+ usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
+}
+
+void usnic_ib_log_vf(struct usnic_ib_vf *vf)
+{
+ char buf[1000];
+ usnic_ib_dump_vf(vf, buf, sizeof(buf));
+ usnic_dbg("%s\n", buf);
+}
+
+/* Start of netdev section */
+static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
+{
+ const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
+ "NETDEV_REBOOT", "NETDEV_CHANGE",
+ "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
+ "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
+ "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
+ "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
+ "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
+ "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
+ };
+
+ if (event >= ARRAY_SIZE(event2str))
+ return "UNKNOWN_NETDEV_EVENT";
+ else
+ return event2str[event];
+}
+
+static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
+{
+ struct usnic_ib_ucontext *ctx;
+ struct usnic_ib_qp_grp *qp_grp;
+ enum ib_qp_state cur_state;
+ int status;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
+ list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
+ cur_state = qp_grp->state;
+ if (cur_state == IB_QPS_INIT ||
+ cur_state == IB_QPS_RTR ||
+ cur_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp,
+ IB_QPS_ERR,
+ NULL);
+ if (status) {
+ usnic_err("Failed to transistion qp grp %u from %s to %s\n",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string
+ (cur_state),
+ usnic_ib_qp_grp_state_to_string
+ (IB_QPS_ERR));
+ }
+ }
+ }
+ }
+}
+
+static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event)
+{
+ struct net_device *netdev;
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ netdev = us_ibdev->netdev;
+ switch (event) {
+ case NETDEV_REBOOT:
+ usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ case NETDEV_CHANGEADDR:
+ if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
+ sizeof(us_ibdev->ufdev->mac))) {
+ usnic_dbg("Ignoring addr change on %s\n",
+ us_ibdev->ib_dev.name);
+ } else {
+ usnic_info(" %s old mac: %pM new mac: %pM\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mac,
+ netdev->dev_addr);
+ usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ }
+
+ break;
+ case NETDEV_CHANGEMTU:
+ if (us_ibdev->ufdev->mtu != netdev->mtu) {
+ usnic_info("MTU Change on %s old: %u new: %u\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mtu, netdev->mtu);
+ usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ } else {
+ usnic_dbg("Ignoring MTU change on %s\n",
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ default:
+ usnic_dbg("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
+static int usnic_ib_netdevice_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_usdev_event(us_ibdev, event);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block usnic_ib_netdevice_notifier = {
+ .notifier_call = usnic_ib_netdevice_event
+};
+/* End of netdev section */
+
+/* Start of inet section */
+static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ usnic_info("%s via ip notifiers",
+ usnic_ib_netdev_event_to_string(event));
+ usnic_fwd_del_ipaddr(us_ibdev->ufdev);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
+ usnic_info("%s via ip notifiers: ip %pI4",
+ usnic_ib_netdev_event_to_string(event),
+ &us_ibdev->ufdev->inaddr);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ default:
+ usnic_info("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+static struct notifier_block usnic_ib_inetaddr_notifier = {
+ .notifier_call = usnic_ib_inetaddr_event
+};
+/* End of inet section*/
+
+/* Start of PF discovery section */
+static void *usnic_ib_device_add(struct pci_dev *dev)
+{
+ struct usnic_ib_dev *us_ibdev;
+ union ib_gid gid;
+ struct in_ifaddr *in;
+ struct net_device *netdev;
+
+ usnic_dbg("\n");
+ netdev = pci_get_drvdata(dev);
+
+ us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ usnic_err("Device %s context alloc failed\n",
+ netdev_name(pci_get_drvdata(dev)));
+ return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+ }
+
+ us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
+ if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
+ usnic_err("Failed to alloc ufdev for %s with err %ld\n",
+ pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+ goto err_dealloc;
+ }
+
+ mutex_init(&us_ibdev->usdev_lock);
+ INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
+ INIT_LIST_HEAD(&us_ibdev->ctx_list);
+
+ us_ibdev->pdev = dev;
+ us_ibdev->netdev = pci_get_drvdata(dev);
+ us_ibdev->ib_dev.owner = THIS_MODULE;
+ us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
+ us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
+ us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
+ us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
+ strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
+
+ us_ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+ us_ibdev->ib_dev.query_device = usnic_ib_query_device;
+ us_ibdev->ib_dev.query_port = usnic_ib_query_port;
+ us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
+ us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
+ us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
+ us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
+ us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
+ us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
+ us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
+ us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
+ us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
+ us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
+ us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
+ us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
+ us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
+ us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
+ us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
+ us_ibdev->ib_dev.mmap = usnic_ib_mmap;
+ us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
+ us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
+ us_ibdev->ib_dev.post_send = usnic_ib_post_send;
+ us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
+ us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
+ us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
+ us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
+
+
+ if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ goto err_fwd_dealloc;
+
+ usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
+ usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
+ if (netif_carrier_ok(us_ibdev->netdev))
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+
+ in = ((struct in_device *)(netdev->ip_ptr))->ifa_list;
+ if (in != NULL)
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address);
+
+ usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
+ us_ibdev->ufdev->inaddr, &gid.raw[0]);
+ memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ kref_init(&us_ibdev->vf_cnt);
+
+ usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
+ us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
+ us_ibdev->ufdev->mtu);
+ return us_ibdev;
+
+err_fwd_dealloc:
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+err_dealloc:
+ usnic_err("failed -- deallocing device\n");
+ ib_dealloc_device(&us_ibdev->ib_dev);
+ return NULL;
+}
+
+static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
+{
+ usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_sysfs_unregister_usdev(us_ibdev);
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+ ib_unregister_device(&us_ibdev->ib_dev);
+ ib_dealloc_device(&us_ibdev->ib_dev);
+}
+
+static void usnic_ib_undiscover_pf(struct kref *kref)
+{
+ struct usnic_ib_dev *us_ibdev, *tmp;
+ struct pci_dev *dev;
+ bool found = false;
+
+ dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry_safe(us_ibdev, tmp,
+ &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == dev) {
+ list_del(&us_ibdev->ib_dev_link);
+ usnic_ib_device_remove(us_ibdev);
+ found = true;
+ break;
+ }
+ }
+
+ WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
+
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+}
+
+static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct pci_dev *parent_pci, *vf_pci;
+ int err;
+
+ vf_pci = usnic_vnic_get_pdev(vnic);
+ parent_pci = pci_physfn(vf_pci);
+
+ BUG_ON(!parent_pci);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == parent_pci) {
+ kref_get(&us_ibdev->vf_cnt);
+ goto out;
+ }
+ }
+
+ us_ibdev = usnic_ib_device_add(parent_pci);
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ err = usnic_ib_sysfs_register_usdev(us_ibdev);
+ if (err) {
+ usnic_ib_device_remove(us_ibdev);
+ us_ibdev = ERR_PTR(err);
+ goto out;
+ }
+
+ list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
+out:
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+ return us_ibdev;
+}
+/* End of PF discovery section */
+
+/* Start of PCI section */
+
+static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
+ {0,}
+};
+
+static int usnic_ib_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int err;
+ struct usnic_ib_dev *pf;
+ struct usnic_ib_vf *vf;
+ enum usnic_vnic_res_type res_type;
+
+ vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+ if (!vf)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ usnic_err("Failed to enable %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_clean_vf;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ usnic_err("Failed to request region for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_disable_device;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vf);
+
+ vf->vnic = usnic_vnic_alloc(pdev);
+ if (IS_ERR_OR_NULL(vf->vnic)) {
+ err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
+ usnic_err("Failed to alloc vnic for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_release_regions;
+ }
+
+ pf = usnic_ib_discover_pf(vf->vnic);
+ if (IS_ERR_OR_NULL(pf)) {
+ usnic_err("Failed to discover pf of vnic %s with err%ld\n",
+ pci_name(pdev), PTR_ERR(pf));
+ err = pf ? PTR_ERR(pf) : -EFAULT;
+ goto out_clean_vnic;
+ }
+
+ vf->pf = pf;
+ spin_lock_init(&vf->lock);
+ mutex_lock(&pf->usdev_lock);
+ list_add_tail(&vf->link, &pf->vf_dev_list);
+ /*
+ * Save max settings (will be same for each VF, easier to re-write than
+ * to say "if (!set) { set_values(); set=1; }
+ */
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
+ res_type);
+ }
+
+ mutex_unlock(&pf->usdev_lock);
+
+ usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
+ pf->ib_dev.name);
+ usnic_ib_log_vf(vf);
+ return 0;
+
+out_clean_vnic:
+ usnic_vnic_free(vf->vnic);
+out_release_regions:
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+out_disable_device:
+ pci_disable_device(pdev);
+out_clean_vf:
+ kfree(vf);
+ return err;
+}
+
+static void usnic_ib_pci_remove(struct pci_dev *pdev)
+{
+ struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
+ struct usnic_ib_dev *pf = vf->pf;
+
+ mutex_lock(&pf->usdev_lock);
+ list_del(&vf->link);
+ mutex_unlock(&pf->usdev_lock);
+
+ kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
+ usnic_vnic_free(vf->vnic);
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(vf);
+
+ usnic_info("Removed VF %s\n", pci_name(pdev));
+}
+
+/* PCI driver entry points */
+static struct pci_driver usnic_ib_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = usnic_ib_pci_ids,
+ .probe = usnic_ib_pci_probe,
+ .remove = usnic_ib_pci_remove,
+};
+/* End of PCI section */
+
+/* Start of module section */
+static int __init usnic_ib_init(void)
+{
+ int err;
+
+ printk_once(KERN_INFO "%s", usnic_version);
+
+ err = usnic_uiom_init(DRV_NAME);
+ if (err) {
+ usnic_err("Unable to initalize umem with err %d\n", err);
+ return err;
+ }
+
+ if (pci_register_driver(&usnic_ib_pci_driver)) {
+ usnic_err("Unable to register with PCI\n");
+ goto out_umem_fini;
+ }
+
+ err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ if (err) {
+ usnic_err("Failed to register netdev notifier\n");
+ goto out_pci_unreg;
+ }
+
+ err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ if (err) {
+ usnic_err("Failed to register inet addr notifier\n");
+ goto out_unreg_netdev_notifier;
+ }
+
+ err = usnic_transport_init();
+ if (err) {
+ usnic_err("Failed to initialize transport\n");
+ goto out_unreg_inetaddr_notifier;
+ }
+
+ usnic_debugfs_init();
+
+ return 0;
+
+out_unreg_inetaddr_notifier:
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+out_unreg_netdev_notifier:
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+out_pci_unreg:
+ pci_unregister_driver(&usnic_ib_pci_driver);
+out_umem_fini:
+ usnic_uiom_fini();
+
+ return err;
+}
+
+static void __exit usnic_ib_destroy(void)
+{
+ usnic_dbg("\n");
+ usnic_debugfs_exit();
+ usnic_transport_fini();
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ pci_unregister_driver(&usnic_ib_pci_driver);
+ usnic_uiom_fini();
+}
+
+MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
+MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
+module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
+MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
+MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
+
+module_init(usnic_ib_init);
+module_exit(usnic_ib_destroy);
+/* End of module section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
new file mode 100644
index 00000000000..f8dfd76be89
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+#include "usnic_fwd.h"
+#include "usnic_uiom.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_transport.h"
+
+#define DFLT_RQ_IDX 0
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return "Rst";
+ case IB_QPS_INIT:
+ return "Init";
+ case IB_QPS_RTR:
+ return "RTR";
+ case IB_QPS_RTS:
+ return "RTS";
+ case IB_QPS_SQD:
+ return "SQD";
+ case IB_QPS_SQE:
+ return "SQE";
+ case IB_QPS_ERR:
+ return "ERR";
+ default:
+ return "UNKOWN STATE";
+
+ }
+}
+
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
+{
+ return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
+}
+
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_qp_grp *qp_grp = obj;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ if (obj) {
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(
+ qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic),
+ default_flow->flow->flow_id);
+ } else {
+ return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
+ }
+}
+
+static struct usnic_vnic_res_chunk *
+get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
+{
+ lockdep_assert_held(&qp_grp->lock);
+ /*
+ * The QP res chunk, used to derive qp indices,
+ * are just indices of the RQs
+ */
+ return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+}
+
+static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ int status;
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+
+ lockdep_assert_held(&qp_grp->lock);
+
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
+ res->vnic_idx, qp_grp->ufdev->name,
+ vnic_idx, status);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (i--; i >= 0; i--) {
+ res = res_chunk->res[i];
+ usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ }
+
+ return status;
+}
+
+static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+ int status = 0;
+
+ lockdep_assert_held(&qp_grp->lock);
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
+ res->vnic_idx,
+ qp_grp->ufdev->name,
+ vnic_idx, status);
+ }
+ }
+
+ return status;
+
+}
+
+static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_filter_action *uaction)
+{
+ struct usnic_vnic_res_chunk *res_chunk;
+
+ res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get %s with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ uaction->action.type = FILTER_ACTION_RQ_STEERING;
+ uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ uint16_t port_num;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ port_num = trans_spec->usnic_roce.port_num;
+
+ /* Reserve Port */
+ port_num = usnic_transport_rsrv_port(trans_type, port_num);
+ if (port_num == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* Create Flow */
+ usnic_fwd_init_usnic_filter(&filter, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_unreserve_port;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_unreserve_port;
+ }
+
+ /* Create Flow Handle */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->usnic_roce.port_num = port_num;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_unreserve_port:
+ usnic_transport_unrsrv_port(trans_type, port_num);
+ return ERR_PTR(err);
+}
+
+static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_unrsrv_port(qp_flow->trans_type,
+ qp_flow->usnic_roce.port_num);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct socket *sock;
+ int sock_fd;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ uint32_t addr;
+ uint16_t port_num;
+ int proto;
+
+ trans_type = trans_spec->trans_type;
+ sock_fd = trans_spec->udp.sock_fd;
+
+ /* Get and check socket */
+ sock = usnic_transport_get_socket(sock_fd);
+ if (IS_ERR_OR_NULL(sock))
+ return ERR_CAST(sock);
+
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
+ if (err)
+ goto out_put_sock;
+
+ if (proto != IPPROTO_UDP) {
+ usnic_err("Protocol for fd %d is not UDP", sock_fd);
+ err = -EPERM;
+ goto out_put_sock;
+ }
+
+ /* Create flow */
+ usnic_fwd_init_udp_filter(&filter, addr, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_put_sock;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_put_sock;
+ }
+
+ /* Create qp_flow */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->udp.sock = sock;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_put_sock:
+ usnic_transport_put_socket(sock);
+ return ERR_PTR(err);
+}
+
+static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_put_socket(qp_flow->udp.sock);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ qp_flow = create_udp_flow(qp_grp, trans_spec);
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n",
+ trans_spec->trans_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!IS_ERR_OR_NULL(qp_flow)) {
+ list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
+ usnic_debugfs_flow_add(qp_flow);
+ }
+
+
+ return qp_flow;
+}
+
+static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_debugfs_flow_remove(qp_flow);
+ list_del(&qp_flow->link);
+
+ switch (qp_flow->trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ release_roce_custom_flow(qp_flow);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ release_udp_flow(qp_flow);
+ break;
+ default:
+ WARN(1, "Unsupported transport %u\n",
+ qp_flow->trans_type);
+ break;
+ }
+}
+
+static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
+ list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
+ release_and_remove_flow(qp_flow);
+}
+
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data)
+{
+ int status = 0;
+ int vnic_idx;
+ struct ib_event ib_event;
+ enum ib_qp_state old_state;
+ struct usnic_transport_spec *trans_spec;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ old_state = qp_grp->state;
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ trans_spec = (struct usnic_transport_spec *) data;
+
+ spin_lock(&qp_grp->lock);
+ switch (new_state) {
+ case IB_QPS_RESET:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ /* NO-OP */
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ status = 0;
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ case IB_QPS_ERR:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_INIT:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Optional to specify filters.
+ */
+ status = 0;
+ }
+ break;
+ case IB_QPS_INIT:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Doesn't make sense to go into INIT state
+ * from INIT state w/o adding filters.
+ */
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ status = disable_qp_grp(qp_grp);
+ break;
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ switch (old_state) {
+ case IB_QPS_INIT:
+ status = enable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTS:
+ switch (old_state) {
+ case IB_QPS_RTR:
+ /* NO-OP FOR NOW */
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_ERR:
+ ib_event.device = &qp_grp->vf->pf->ib_dev;
+ ib_event.element.qp = &qp_grp->ibqp;
+ ib_event.event = IB_EVENT_QP_FATAL;
+
+ switch (old_state) {
+ case IB_QPS_RESET:
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ default:
+ status = -EINVAL;
+ }
+ spin_unlock(&qp_grp->lock);
+
+ if (!status) {
+ qp_grp->state = new_state;
+ usnic_info("Transistioned %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ } else {
+ usnic_err("Failed to transistion %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ }
+
+ return status;
+}
+
+static struct usnic_vnic_res_chunk**
+alloc_res_chunk_list(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec, void *owner_obj)
+{
+ enum usnic_vnic_res_type res_type;
+ struct usnic_vnic_res_chunk **res_chunk_list;
+ int err, i, res_cnt, res_lst_sz;
+
+ for (res_lst_sz = 0;
+ res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
+ res_lst_sz++) {
+ /* Do Nothing */
+ }
+
+ res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1),
+ GFP_ATOMIC);
+ if (!res_chunk_list)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
+ i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
+ res_cnt, owner_obj);
+ if (IS_ERR_OR_NULL(res_chunk_list[i])) {
+ err = res_chunk_list[i] ?
+ PTR_ERR(res_chunk_list[i]) : -ENOMEM;
+ usnic_err("Failed to get %s from %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ usnic_vnic_pci_name(vnic),
+ err);
+ goto out_free_res;
+ }
+ }
+
+ return res_chunk_list;
+
+out_free_res:
+ for (i--; i > 0; i--)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+ return ERR_PTR(err);
+}
+
+static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
+{
+ int i;
+ for (i = 0; res_chunk_list[i]; i++)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+}
+
+static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_ib_qp_grp *qp_grp)
+{
+ int err;
+ struct pci_dev *pdev;
+
+ lockdep_assert_held(&vf->lock);
+
+ pdev = usnic_vnic_get_pdev(vf->vnic);
+ if (vf->qp_grp_ref_cnt == 0) {
+ err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
+ if (err) {
+ usnic_err("Failed to attach %s to domain\n",
+ pci_name(pdev));
+ return err;
+ }
+ vf->pd = pd;
+ }
+ vf->qp_grp_ref_cnt++;
+
+ WARN_ON(vf->pd != pd);
+ qp_grp->vf = vf;
+
+ return 0;
+}
+
+static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct pci_dev *pdev;
+ struct usnic_ib_pd *pd;
+
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ pd = qp_grp->vf->pd;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
+ qp_grp->vf->pd = NULL;
+ usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
+ }
+ qp_grp->vf = NULL;
+}
+
+static void log_spec(struct usnic_vnic_res_spec *res_spec)
+{
+ char buf[512];
+ usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
+ usnic_dbg("%s\n", buf);
+}
+
+static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
+ uint32_t *id)
+{
+ enum usnic_transport_type trans_type = qp_flow->trans_type;
+ int err;
+ uint16_t port_num = 0;
+
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ *id = qp_flow->usnic_roce.port_num;
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
+ NULL, NULL,
+ &port_num);
+ if (err)
+ return err;
+ /*
+ * Copy port_num to stack first and then to *id,
+ * so that the short to int cast works for little
+ * and big endian systems.
+ */
+ *id = port_num;
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n", trans_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *transport_spec)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int err;
+ enum usnic_transport_type transport = transport_spec->trans_type;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ lockdep_assert_held(&vf->lock);
+
+ err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
+ res_spec);
+ if (err) {
+ usnic_err("Spec does not meet miniumum req for transport %d\n",
+ transport);
+ log_spec(res_spec);
+ return ERR_PTR(err);
+ }
+
+ qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
+ if (!qp_grp) {
+ usnic_err("Unable to alloc qp_grp - Out of memory\n");
+ return NULL;
+ }
+
+ qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
+ qp_grp);
+ if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
+ err = qp_grp->res_chunk_list ?
+ PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
+ usnic_err("Unable to alloc res for %d with err %d\n",
+ qp_grp->grp_id, err);
+ goto out_free_qp_grp;
+ }
+
+ err = qp_grp_and_vf_bind(vf, pd, qp_grp);
+ if (err)
+ goto out_free_res;
+
+ INIT_LIST_HEAD(&qp_grp->flows_lst);
+ spin_lock_init(&qp_grp->lock);
+ qp_grp->ufdev = ufdev;
+ qp_grp->state = IB_QPS_RESET;
+ qp_grp->owner_pid = current->pid;
+
+ qp_flow = create_and_add_flow(qp_grp, transport_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ usnic_err("Unable to create and add flow with err %ld\n",
+ PTR_ERR(qp_flow));
+ err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ goto out_qp_grp_vf_unbind;
+ }
+
+ err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
+ if (err)
+ goto out_release_flow;
+ qp_grp->ibqp.qp_num = qp_grp->grp_id;
+
+ usnic_ib_sysfs_qpn_add(qp_grp);
+
+ return qp_grp;
+
+out_release_flow:
+ release_and_remove_flow(qp_flow);
+out_qp_grp_vf_unbind:
+ qp_grp_and_vf_unbind(qp_grp);
+out_free_res:
+ free_qp_grp_res(qp_grp->res_chunk_list);
+out_free_qp_grp:
+ kfree(qp_grp);
+
+ return ERR_PTR(err);
+}
+
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ release_and_remove_all_flows(qp_grp);
+ usnic_ib_sysfs_qpn_remove(qp_grp);
+ qp_grp_and_vf_unbind(qp_grp);
+ free_qp_grp_res(qp_grp->res_chunk_list);
+ kfree(qp_grp);
+}
+
+struct usnic_vnic_res_chunk*
+usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type res_type)
+{
+ int i;
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ if (qp_grp->res_chunk_list[i]->type == res_type)
+ return qp_grp->res_chunk_list[i];
+ }
+
+ return ERR_PTR(-EINVAL);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
new file mode 100644
index 00000000000..b0aafe8db0c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_QP_GRP_H_
+#define USNIC_IB_QP_GRP_H_
+
+#include <linux/debugfs.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_ib.h"
+#include "usnic_abi.h"
+#include "usnic_fwd.h"
+#include "usnic_vnic.h"
+
+/*
+ * The qp group struct represents all the hw resources needed to present a ib_qp
+ */
+struct usnic_ib_qp_grp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+ int grp_id;
+
+ struct usnic_fwd_dev *ufdev;
+ struct usnic_ib_ucontext *ctx;
+ struct list_head flows_lst;
+
+ struct usnic_vnic_res_chunk **res_chunk_list;
+
+ pid_t owner_pid;
+ struct usnic_ib_vf *vf;
+ struct list_head link;
+
+ spinlock_t lock;
+
+ struct kobject kobj;
+};
+
+struct usnic_ib_qp_grp_flow {
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ struct socket *sock;
+ } udp;
+ };
+ struct usnic_ib_qp_grp *qp_grp;
+ struct list_head link;
+
+ /* Debug FS */
+ struct dentry *dbgfs_dentry;
+ char dentry_name[32];
+};
+
+static const struct
+usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
+ { /*USNIC_TRANSPORT_UNKNOWN*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_IPV4_UDP*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+};
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *trans_spec);
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data);
+struct usnic_vnic_res_chunk
+*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type type);
+static inline
+struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
+}
+#endif /* USNIC_IB_QP_GRP_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
new file mode 100644
index 00000000000..27dc67c1689
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_vnic.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_log.h"
+
+static ssize_t usnic_ib_show_fw_ver(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
+}
+
+static ssize_t usnic_ib_show_board(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ unsigned short subsystem_device_id;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ subsystem_device_id = us_ibdev->pdev->subsystem_device;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+}
+
+/*
+ * Report the configuration for this PF
+ */
+static ssize_t
+usnic_ib_show_config(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ char *ptr;
+ unsigned left;
+ unsigned n;
+ enum usnic_vnic_res_type res_type;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ /* Buffer space limit is 1 page */
+ ptr = buf;
+ left = PAGE_SIZE;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ char *busname;
+
+ /*
+ * bus name seems to come with annoying prefix.
+ * Remove it if it is predictable
+ */
+ busname = us_ibdev->pdev->bus->name;
+ if (strncmp(busname, "PCI Bus ", 8) == 0)
+ busname += 8;
+
+ n = scnprintf(ptr, left,
+ "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
+ us_ibdev->ib_dev.name,
+ busname,
+ PCI_SLOT(us_ibdev->pdev->devfn),
+ PCI_FUNC(us_ibdev->pdev->devfn),
+ netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac,
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ if (us_ibdev->vf_res_cnt[res_type] == 0)
+ continue;
+ n = scnprintf(ptr, left, " %d %s%s",
+ us_ibdev->vf_res_cnt[res_type],
+ usnic_vnic_res_type_to_str(res_type),
+ (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
+ "," : "");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else {
+ n = scnprintf(ptr, left, "%s: no VFs\n",
+ us_ibdev->ib_dev.name);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return ptr - buf;
+}
+
+static ssize_t
+usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ netdev_name(us_ibdev->netdev));
+}
+
+static ssize_t
+usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+}
+
+static ssize_t
+usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int qp_per_vf;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "%d\n", qp_per_vf);
+}
+
+static ssize_t
+usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+}
+
+static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
+static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
+static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
+static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
+static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
+static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
+
+static struct device_attribute *usnic_class_attributes[] = {
+ &dev_attr_fw_ver,
+ &dev_attr_board_id,
+ &dev_attr_config,
+ &dev_attr_iface,
+ &dev_attr_max_vf,
+ &dev_attr_qp_per_vf,
+ &dev_attr_cq_per_vf,
+};
+
+struct qpn_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
+};
+
+/*
+ * Definitions for supporting QPN entries in sysfs
+ */
+static ssize_t
+usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct qpn_attribute *qpn_attr;
+
+ qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
+ qpn_attr = container_of(attr, struct qpn_attribute, attr);
+
+ return qpn_attr->show(qp_grp, buf);
+}
+
+static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
+ .show = usnic_ib_qpn_attr_show
+};
+
+#define QPN_ATTR_RO(NAME) \
+struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
+
+static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+}
+
+static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ int i, j, n;
+ int left;
+ char *ptr;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *vnic_res;
+
+ left = PAGE_SIZE;
+ ptr = buf;
+
+ n = scnprintf(ptr, left,
+ "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ res_chunk = qp_grp->res_chunk_list[i];
+ for (j = 0; j < res_chunk->cnt; j++) {
+ vnic_res = res_chunk->res[j];
+ n = scnprintf(ptr, left, "%s[%d] ",
+ usnic_vnic_res_type_to_str(vnic_res->type),
+ vnic_res->vnic_idx);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ }
+
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ return ptr - buf;
+}
+
+static QPN_ATTR_RO(context);
+static QPN_ATTR_RO(summary);
+
+static struct attribute *usnic_ib_qpn_default_attrs[] = {
+ &qpn_attr_context.attr,
+ &qpn_attr_summary.attr,
+ NULL
+};
+
+static struct kobj_type usnic_ib_qpn_type = {
+ .sysfs_ops = &usnic_ib_qpn_sysfs_ops,
+ .default_attrs = usnic_ib_qpn_default_attrs
+};
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ int err;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ err = device_create_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ if (err) {
+ usnic_err("Failed to create device file %d for %s eith err %d",
+ i, us_ibdev->ib_dev.name, err);
+ return -EINVAL;
+ }
+ }
+
+ /* create kernel object for looking at individual QPs */
+ kobject_get(&us_ibdev->ib_dev.dev.kobj);
+ us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
+ &us_ibdev->ib_dev.dev.kobj);
+ if (us_ibdev->qpn_kobj == NULL) {
+ kobject_put(&us_ibdev->ib_dev.dev.kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ device_remove_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ }
+
+ kobject_put(us_ibdev->qpn_kobj);
+}
+
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int err;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
+ kobject_get(us_ibdev->qpn_kobj),
+ "%d", qp_grp->grp_id);
+ if (err) {
+ kobject_put(us_ibdev->qpn_kobj);
+ return;
+ }
+}
+
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ kobject_put(&qp_grp->kobj);
+ kobject_put(us_ibdev->qpn_kobj);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
new file mode 100644
index 00000000000..0d09b493cd0
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_SYSFS_H_
+#define USNIC_IB_SYSFS_H_
+
+#include "usnic_ib.h"
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+
+#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
new file mode 100644
index 00000000000..53bd6a2d9cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_ib.h"
+#include "usnic_common_util.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_transport.h"
+
+#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
+
+static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
+{
+ *fw_ver = (u64) *fw_ver_str;
+}
+
+static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_create_qp_resp resp;
+ struct pci_dev *pdev;
+ struct vnic_dev_bar *bar;
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ int i, err;
+
+ memset(&resp, 0, sizeof(resp));
+
+ us_ibdev = qp_grp->vf->pf;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (!pdev) {
+ usnic_err("Failed to get pdev of qp_grp %d\n",
+ qp_grp->grp_id);
+ return -EFAULT;
+ }
+
+ bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
+ if (!bar) {
+ usnic_err("Failed to get bar0 of qp_grp %d vf %s",
+ qp_grp->grp_id, pci_name(pdev));
+ return -EFAULT;
+ }
+
+ resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
+ resp.bar_bus_addr = bar->bus_addr;
+ resp.bar_len = bar->len;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
+ resp.rq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.rq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
+ resp.wq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.wq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
+ resp.cq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.cq_idx[i] = chunk->res[i]->vnic_idx;
+
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ resp.transport = default_flow->trans_type;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp*
+find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
+ struct usnic_ib_pd *pd,
+ struct usnic_transport_spec *trans_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ struct usnic_ib_vf *vf;
+ struct usnic_vnic *vnic;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct device *dev, **dev_list;
+ int i, found = 0;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ if (list_empty(&us_ibdev->vf_dev_list)) {
+ usnic_info("No vfs to allocate\n");
+ return NULL;
+ }
+
+ if (usnic_ib_share_vf) {
+ /* Try to find resouces on a used vf which is in pd */
+ dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
+ for (i = 0; dev_list[i]; i++) {
+ dev = dev_list[i];
+ vf = pci_get_drvdata(to_pci_dev(dev));
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (!usnic_vnic_check_room(vnic, res_spec)) {
+ usnic_dbg("Found used vnic %s from %s\n",
+ us_ibdev->ib_dev.name,
+ pci_name(usnic_vnic_get_pdev(
+ vnic)));
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+
+ }
+ usnic_uiom_free_dev_list(dev_list);
+ }
+
+ if (!found) {
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+ }
+ }
+
+ if (!found) {
+ usnic_info("No free qp grp found on %s\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
+ trans_spec);
+ spin_unlock(&vf->lock);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ usnic_err("Failed to allocate qp_grp\n");
+ return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
+ }
+
+ return qp_grp;
+}
+
+static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_vf *vf = qp_grp->vf;
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+
+ spin_lock(&vf->lock);
+ usnic_ib_qp_grp_destroy(qp_grp);
+ spin_unlock(&vf->lock);
+}
+
+static void eth_speed_to_ib_speed(int speed, u8 *active_speed,
+ u8 *active_width)
+{
+ if (speed <= 10000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_FDR10;
+ } else if (speed <= 20000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_DDR;
+ } else if (speed <= 30000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ } else if (speed <= 40000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR10;
+ } else {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ }
+}
+
+static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
+{
+ if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
+ cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Start of ib callback functions */
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ union ib_gid gid;
+ struct ethtool_drvinfo info;
+ struct ethtool_cmd cmd;
+ int qp_per_vf;
+
+ usnic_dbg("\n");
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid.raw[0]);
+ memcpy(&props->sys_image_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
+ props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
+ props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
+ props->vendor_id = PCI_VENDOR_ID_CISCO;
+ props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
+ props->hw_ver = us_ibdev->pdev->subsystem_device;
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+ props->max_qp = qp_per_vf *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->max_pd = USNIC_UIOM_MAX_PD_CNT;
+ props->max_mr = USNIC_UIOM_MAX_MR_CNT;
+ props->local_ca_ack_delay = 0;
+ props->max_pkeys = 0;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_qp_rd_atom = 0;
+ props->max_qp_init_rd_atom = 0;
+ props->max_res_rd_atom = 0;
+ props->max_srq = 0;
+ props->max_srq_wr = 0;
+ props->max_srq_sge = 0;
+ props->max_fast_reg_page_list_len = 0;
+ props->max_mcast_grp = 0;
+ props->max_mcast_qp_attach = 0;
+ props->max_total_mcast_qp_attach = 0;
+ props->max_map_per_fmr = 0;
+ /* Owned by Userspace
+ * max_qp_wr, max_sge, max_sge_rd, max_cqe */
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ struct ethtool_cmd cmd;
+
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+
+ props->lid = 0;
+ props->lmc = 1;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+
+ if (!us_ibdev->ufdev->link_up) {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ } else if (!us_ibdev->ufdev->inaddr) {
+ props->state = IB_PORT_INIT;
+ props->phys_state = 4;
+ } else {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ props->port_cap_flags = 0;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->bad_pkey_cntr = 0;
+ props->qkey_viol_cntr = 0;
+ eth_speed_to_ib_speed(cmd.speed, &props->active_speed,
+ &props->active_width);
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
+ /* Userspace will adjust for hdrs */
+ props->max_msg_sz = us_ibdev->ufdev->mtu;
+ props->max_vl_num = 1;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ int err;
+
+ usnic_dbg("\n");
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ usnic_dbg("\n");
+ qp_attr->qp_state = qp_grp->state;
+ qp_attr->cur_qp_state = qp_grp->state;
+
+ switch (qp_grp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ qp_attr->qkey = 0;
+ break;
+ default:
+ usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ mutex_unlock(&vf->pf->usdev_lock);
+ return 0;
+
+err_out:
+ mutex_unlock(&vf->pf->usdev_lock);
+ return err;
+}
+
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ if (index > 1)
+ return -EINVAL;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid->raw[0]);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 1)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_pd *pd;
+ void *umem_pd;
+
+ usnic_dbg("\n");
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
+ if (IS_ERR_OR_NULL(umem_pd)) {
+ kfree(pd);
+ return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+ }
+
+ usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
+ pd, context, ibdev->name);
+ return &pd->ibpd;
+}
+
+int usnic_ib_dealloc_pd(struct ib_pd *pd)
+{
+ usnic_info("freeing domain 0x%p\n", pd);
+
+ usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ kfree(pd);
+ return 0;
+}
+
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int err;
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_ucontext *ucontext;
+ int cq_cnt;
+ struct usnic_vnic_res_spec res_spec;
+ struct usnic_ib_create_qp_cmd cmd;
+ struct usnic_transport_spec trans_spec;
+
+ usnic_dbg("\n");
+
+ ucontext = to_uucontext(pd->uobject->context);
+ us_ibdev = to_usdev(pd->device);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (err) {
+ usnic_err("%s: cannot copy udata for create_qp\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = create_qp_validate_user_data(cmd);
+ if (err) {
+ usnic_err("%s: Failed to validate user data\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_UD) {
+ usnic_err("%s asked to make a non-UD QP: %d\n",
+ us_ibdev->ib_dev.name, init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ trans_spec = cmd.spec;
+ mutex_lock(&us_ibdev->usdev_lock);
+ cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
+ res_spec = min_transport_spec[trans_spec.trans_type];
+ usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
+ qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
+ &trans_spec,
+ &res_spec);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+ goto out_release_mutex;
+ }
+
+ err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
+ if (err) {
+ err = -EBUSY;
+ goto out_release_qp_grp;
+ }
+
+ qp_grp->ctx = ucontext;
+ list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
+ usnic_ib_log_vf(qp_grp->vf);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return &qp_grp->ibqp;
+
+out_release_qp_grp:
+ qp_grp_destroy(qp_grp);
+out_release_mutex:
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
+ usnic_err("Failed to move qp grp %u to reset\n",
+ qp_grp->grp_id);
+ }
+
+ list_del(&qp_grp->link);
+ qp_grp_destroy(qp_grp);
+ mutex_unlock(&vf->pf->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int status;
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(ibqp);
+
+ /* TODO: Future Support All States */
+ mutex_lock(&qp_grp->vf->pf->usdev_lock);
+ if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ } else {
+ usnic_err("Unexpected combination mask: %u state: %u\n",
+ attr_mask & IB_QP_STATE, attr->qp_state);
+ status = -EINVAL;
+ }
+
+ mutex_unlock(&qp_grp->vf->pf->usdev_lock);
+ return status;
+}
+
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ib_cq *cq;
+
+ usnic_dbg("\n");
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-EBUSY);
+
+ return cq;
+}
+
+int usnic_ib_destroy_cq(struct ib_cq *cq)
+{
+ usnic_dbg("\n");
+ kfree(cq);
+ return 0;
+}
+
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_mr *mr;
+ int err;
+
+ usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
+ virt_addr, length);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (IS_ERR_OR_NULL(mr))
+ return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+
+ mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
+ access_flags, 0);
+ if (IS_ERR_OR_NULL(mr->umem)) {
+ err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
+ goto err_free;
+ }
+
+ mr->ibmr.lkey = mr->ibmr.rkey = 0;
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct usnic_ib_mr *mr = to_umr(ibmr);
+
+ usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
+
+ usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+ kfree(mr);
+ return 0;
+}
+
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_ucontext *context;
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&context->qp_grp_list);
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_add_tail(&context->link, &us_ibdev->ctx_list);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return &context->ibucontext;
+}
+
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ BUG_ON(!list_empty(&context->qp_grp_list));
+ list_del(&context->link);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ kfree(context);
+ return 0;
+}
+
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ struct usnic_ib_ucontext *uctx = to_ucontext(context);
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ struct vnic_dev_bar *bar;
+ dma_addr_t bus_addr;
+ unsigned int len;
+ unsigned int vfid;
+
+ usnic_dbg("\n");
+
+ us_ibdev = to_usdev(context->device);
+ vma->vm_flags |= VM_IO;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vfid = vma->vm_pgoff;
+ usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
+ vma->vm_pgoff, PAGE_SHIFT, vfid);
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
+ vf = qp_grp->vf;
+ if (usnic_vnic_get_index(vf->vnic) == vfid) {
+ bar = usnic_vnic_get_bar(vf->vnic, 0);
+ if ((vma->vm_end - vma->vm_start) != bar->len) {
+ usnic_err("Bar0 Len %lu - Request map %lu\n",
+ bar->len,
+ vma->vm_end - vma->vm_start);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return -EINVAL;
+ }
+ bus_addr = bar->bus_addr;
+ len = bar->len;
+ usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
+ &bus_addr, bar->vaddr, bar->len);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ bus_addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+ }
+
+ mutex_unlock(&us_ibdev->usdev_lock);
+ usnic_err("No VF %u found\n", vfid);
+ return -EINVAL;
+}
+
+/* In ib callbacks section - Start of stub funcs */
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-EPERM);
+}
+
+int usnic_ib_destroy_ah(struct ib_ah *ah)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+/* In ib callbacks section - End of stub funcs */
+/* End of ib callbacks section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
new file mode 100644
index 00000000000..bb864f5aed7
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_VERBS_H_
+#define USNIC_IB_VERBS_H_
+
+#include "usnic_ib.h"
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props);
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey);
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd);
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int usnic_ib_destroy_qp(struct ib_qp *qp);
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq);
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int usnic_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr);
+int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags);
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
+#endif /* !USNIC_IB_VERBS_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h
new file mode 100644
index 00000000000..75777a66c68
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_log.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_LOG_H_
+#define USNIC_LOG_H_
+
+#include "usnic.h"
+
+extern unsigned int usnic_log_lvl;
+
+#define USNIC_LOG_LVL_NONE (0)
+#define USNIC_LOG_LVL_ERR (1)
+#define USNIC_LOG_LVL_INFO (2)
+#define USNIC_LOG_LVL_DBG (3)
+
+#define usnic_printk(lvl, args...) \
+ do { \
+ printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
+ __LINE__); \
+ printk(args); \
+ } while (0)
+
+#define usnic_dbg(args...) \
+ do { \
+ if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_info(args...) \
+do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_err(args...) \
+ do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
+ usnic_printk(KERN_ERR, args); \
+ } \
+ } while (0)
+#endif /* !USNIC_LOG_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
new file mode 100644
index 00000000000..ddef6f77a78
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bitmap.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/inet_sock.h>
+
+#include "usnic_transport.h"
+#include "usnic_log.h"
+
+/* ROCE */
+static unsigned long *roce_bitmap;
+static u16 roce_next_port = 1;
+#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
+static DEFINE_SPINLOCK(roce_bitmap_lock);
+
+const char *usnic_transport_to_str(enum usnic_transport_type type)
+{
+ switch (type) {
+ case USNIC_TRANSPORT_UNKNOWN:
+ return "Unknown";
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ return "roce custom";
+ case USNIC_TRANSPORT_IPV4_UDP:
+ return "IPv4 UDP";
+ case USNIC_TRANSPORT_MAX:
+ return "Max?";
+ default:
+ return "Not known";
+ }
+}
+
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock)
+{
+ int err;
+ uint32_t addr;
+ uint16_t port;
+ int proto;
+
+ memset(buf, 0, buf_sz);
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
+ if (err)
+ return 0;
+
+ return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
+ proto, &addr, port);
+}
+
+/*
+ * reserve a port number. if "0" specified, we will try to pick one
+ * starting at roce_next_port. roce_next_port will take on the values
+ * 1..4096
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ port_num = bitmap_find_next_zero_area(roce_bitmap,
+ ROCE_BITMAP_SZ,
+ roce_next_port /* start */,
+ 1 /* nr */,
+ 0 /* align */);
+ roce_next_port = (port_num & 4095) + 1;
+ } else if (test_bit(port_num, roce_bitmap)) {
+ usnic_err("Failed to allocate port for %s\n",
+ usnic_transport_to_str(type));
+ spin_unlock(&roce_bitmap_lock);
+ goto out_fail;
+ }
+ bitmap_set(roce_bitmap, port_num, 1);
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Failed to allocate port - transport %s unsupported\n",
+ usnic_transport_to_str(type));
+ goto out_fail;
+ }
+
+ usnic_dbg("Allocating port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+ return port_num;
+
+out_fail:
+ return 0;
+}
+
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+
+ if (!test_bit(port_num, roce_bitmap)) {
+ usnic_err("Unreserving invalid %hu for %s\n",
+ port_num,
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+ bitmap_clear(roce_bitmap, port_num, 1);
+ usnic_dbg("Freeing port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+out_roce_custom:
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
+ }
+}
+
+struct socket *usnic_transport_get_socket(int sock_fd)
+{
+ struct socket *sock;
+ int err;
+ char buf[25];
+
+ /* sockfd_lookup will internally do a fget */
+ sock = sockfd_lookup(sock_fd, &err);
+ if (!sock) {
+ usnic_err("Unable to lookup socket for fd %d with err %d\n",
+ sock_fd, err);
+ return ERR_PTR(-ENOENT);
+ }
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Get sock %s\n", buf);
+
+ return sock;
+}
+
+void usnic_transport_put_socket(struct socket *sock)
+{
+ char buf[100];
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Put sock %s\n", buf);
+ sockfd_put(sock);
+}
+
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port)
+{
+ int len;
+ int err;
+ struct sockaddr_in sock_addr;
+
+ err = sock->ops->getname(sock,
+ (struct sockaddr *)&sock_addr,
+ &len, 0);
+ if (err)
+ return err;
+
+ if (sock_addr.sin_family != AF_INET)
+ return -EINVAL;
+
+ if (proto)
+ *proto = sock->sk->sk_protocol;
+ if (port)
+ *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
+ if (addr)
+ *addr = ntohl(((struct sockaddr_in *)
+ &sock_addr)->sin_addr.s_addr);
+
+ return 0;
+}
+
+int usnic_transport_init(void)
+{
+ roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
+ if (!roce_bitmap) {
+ usnic_err("Failed to allocate bit map");
+ return -ENOMEM;
+ }
+
+ /* Do not ever allocate bit 0, hence set it here */
+ bitmap_set(roce_bitmap, 0, 1);
+ return 0;
+}
+
+void usnic_transport_fini(void)
+{
+ kfree(roce_bitmap);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h
new file mode 100644
index 00000000000..7e5dc6d9f46
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_TRANSPORT_H_
+#define USNIC_TRANSPORT_H_
+
+#include "usnic_abi.h"
+
+const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
+/*
+ * Returns number of bytes written, excluding null terminator. If
+ * nothing was written, the function returns 0.
+ */
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock);
+/*
+ * Reserve a port. If "port_num" is set, then the function will try
+ * to reserve that particular port.
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
+/*
+ * Do a fget on the socket refered to by sock_fd and returns the socket.
+ * Socket will not be destroyed before usnic_transport_put_socket has
+ * been called.
+ */
+struct socket *usnic_transport_get_socket(int sock_fd);
+void usnic_transport_put_socket(struct socket *sock);
+/*
+ * Call usnic_transport_get_socket before calling *_sock_get_addr
+ */
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port);
+int usnic_transport_init(void);
+void usnic_transport_fini(void);
+#endif /* !USNIC_TRANSPORT_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
new file mode 100644
index 00000000000..801a1d6937e
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/hugetlb.h>
+#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_uiom_interval_tree.h"
+
+static struct workqueue_struct *usnic_uiom_wq;
+
+#define USNIC_UIOM_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\
+ ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
+ (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
+
+static void usnic_uiom_reg_account(struct work_struct *work)
+{
+ struct usnic_uiom_reg *umem = container_of(work,
+ struct usnic_uiom_reg, work);
+
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
+}
+
+static int usnic_uiom_dma_fault(struct iommu_domain *domain,
+ struct device *dev,
+ unsigned long iova, int flags,
+ void *token)
+{
+ usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+ dev_name(dev),
+ domain, iova, flags);
+ return -ENOSYS;
+}
+
+static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
+{
+ struct usnic_uiom_chunk *chunk, *tmp;
+ struct page *page;
+ struct scatterlist *sg;
+ int i;
+ dma_addr_t pa;
+
+ list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ page = sg_page(sg);
+ pa = sg_phys(sg);
+ if (dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ usnic_dbg("pa: %pa\n", &pa);
+ }
+ kfree(chunk);
+ }
+}
+
+static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
+ int dmasync, struct list_head *chunk_list)
+{
+ struct page **page_list;
+ struct scatterlist *sg;
+ struct usnic_uiom_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+ int flags;
+ dma_addr_t pa;
+ DEFINE_DMA_ATTRS(attrs);
+
+ if (dmasync)
+ dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ INIT_LIST_HEAD(chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ flags = IOMMU_READ | IOMMU_CACHE;
+ flags |= (writable) ? IOMMU_WRITE : 0;
+ cur_base = addr & PAGE_MASK;
+ ret = 0;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE / sizeof(struct page *)),
+ 1, !writable, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ npages -= ret;
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof(*chunk) +
+ sizeof(struct scatterlist) *
+ min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ sg_set_page(sg, page_list[i + off],
+ PAGE_SIZE, 0);
+ pa = sg_phys(sg);
+ usnic_dbg("va: 0x%lx pa: %pa\n",
+ cur_base + i*PAGE_SIZE, &pa);
+ }
+ cur_base += chunk->nents * PAGE_SIZE;
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ usnic_uiom_put_pages(chunk_list, 0);
+ else
+ current->mm->locked_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+ return ret;
+}
+
+static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ long unsigned va, size;
+
+ list_for_each_entry_safe(interval, tmp, intervals, link) {
+ va = interval->start << PAGE_SHIFT;
+ size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
+ while (size > 0) {
+ /* Workaround for RH 970401 */
+ usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
+ iommu_unmap(pd->domain, va, PAGE_SIZE);
+ va += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ }
+}
+
+static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
+ struct usnic_uiom_reg *uiomr,
+ int dirty)
+{
+ int npages;
+ unsigned long vpn_start, vpn_last;
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int writable = 0;
+ LIST_HEAD(rm_intervals);
+
+ npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ spin_lock(&pd->lock);
+ usnic_uiom_remove_interval(&pd->rb_root, vpn_start,
+ vpn_last, &rm_intervals);
+ usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
+
+ list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
+ if (interval->flags & IOMMU_WRITE)
+ writable = 1;
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
+ spin_unlock(&pd->lock);
+}
+
+static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_reg *uiomr)
+{
+ int i, err;
+ size_t size;
+ struct usnic_uiom_chunk *chunk;
+ struct usnic_uiom_interval_node *interval_node;
+ dma_addr_t pa;
+ dma_addr_t pa_start = 0;
+ dma_addr_t pa_end = 0;
+ long int va_start = -EINVAL;
+ struct usnic_uiom_pd *pd = uiomr->pd;
+ long int va = uiomr->va & PAGE_MASK;
+ int flags = IOMMU_READ | IOMMU_CACHE;
+
+ flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
+ chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
+ list);
+ list_for_each_entry(interval_node, intervals, link) {
+iter_chunk:
+ for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
+ pa = sg_phys(&chunk->page_list[i]);
+ if ((va >> PAGE_SHIFT) < interval_node->start)
+ continue;
+
+ if ((va >> PAGE_SHIFT) == interval_node->start) {
+ /* First page of the interval */
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ WARN_ON(va_start == -EINVAL);
+
+ if ((pa_end + PAGE_SIZE != pa) &&
+ (pa != pa_start)) {
+ /* PAs are not contiguous */
+ size = pa_end - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ if ((va >> PAGE_SHIFT) == interval_node->last) {
+ /* Last page of the interval */
+ size = pa - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ break;
+ }
+
+ if (pa != pa_start)
+ pa_end += PAGE_SIZE;
+ }
+
+ if (i == chunk->nents) {
+ /*
+ * Hit last entry of the chunk,
+ * hence advance to next chunk
+ */
+ chunk = list_first_entry(&chunk->list,
+ struct usnic_uiom_chunk,
+ list);
+ goto iter_chunk;
+ }
+ }
+
+ return 0;
+
+err_out:
+ usnic_uiom_unmap_sorted_intervals(intervals, pd);
+ return err;
+}
+
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int writable, int dmasync)
+{
+ struct usnic_uiom_reg *uiomr;
+ unsigned long va_base, vpn_start, vpn_last;
+ unsigned long npages;
+ int offset, err;
+ LIST_HEAD(sorted_diff_intervals);
+
+ /*
+ * Intel IOMMU map throws an error if a translation entry is
+ * changed from read to write. This module may not unmap
+ * and then remap the entry after fixing the permission
+ * b/c this open up a small windows where hw DMA may page fault
+ * Hence, make all entries to be writable.
+ */
+ writable = 1;
+
+ va_base = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
+ vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
+ if (!uiomr)
+ return ERR_PTR(-ENOMEM);
+
+ uiomr->va = va_base;
+ uiomr->offset = offset;
+ uiomr->length = size;
+ uiomr->writable = writable;
+ uiomr->pd = pd;
+
+ err = usnic_uiom_get_pages(addr, size, writable, dmasync,
+ &uiomr->chunk_list);
+ if (err) {
+ usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_free_uiomr;
+ }
+
+ spin_lock(&pd->lock);
+ err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0,
+ IOMMU_WRITE,
+ &pd->rb_root,
+ &sorted_diff_intervals);
+ if (err) {
+ usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_pages;
+ }
+
+ err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
+ if (err) {
+ usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_intervals;
+
+ }
+
+ err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0);
+ if (err) {
+ usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_unmap_intervals;
+ }
+
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+ spin_unlock(&pd->lock);
+
+ return uiomr;
+
+out_unmap_intervals:
+ usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
+out_put_intervals:
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+out_put_pages:
+ usnic_uiom_put_pages(&uiomr->chunk_list, 0);
+ spin_unlock(&pd->lock);
+out_free_uiomr:
+ kfree(uiomr);
+ return ERR_PTR(err);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+{
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(uiomr);
+ return;
+ }
+
+ diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (closing) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
+ uiomr->mm = mm;
+ uiomr->diff = diff;
+
+ queue_work(usnic_uiom_wq, &uiomr->work);
+ return;
+ }
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ kfree(uiomr);
+}
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+{
+ struct usnic_uiom_pd *pd;
+ void *domain;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+ if (IS_ERR_OR_NULL(domain)) {
+ usnic_err("Failed to allocate IOMMU domain with err %ld\n",
+ PTR_ERR(pd->domain));
+ kfree(pd);
+ return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM);
+ }
+
+ iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
+
+ spin_lock_init(&pd->lock);
+ INIT_LIST_HEAD(&pd->devs);
+
+ return pd;
+}
+
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
+{
+ iommu_domain_free(pd->domain);
+ kfree(pd);
+}
+
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int err;
+
+ uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
+ if (!uiom_dev)
+ return -ENOMEM;
+ uiom_dev->dev = dev;
+
+ err = iommu_attach_device(pd->domain, dev);
+ if (err)
+ goto out_free_dev;
+
+ if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+ usnic_err("IOMMU of %s does not support cache coherency\n",
+ dev_name(dev));
+ err = -EINVAL;
+ goto out_detach_device;
+ }
+
+ spin_lock(&pd->lock);
+ list_add_tail(&uiom_dev->link, &pd->devs);
+ pd->dev_cnt++;
+ spin_unlock(&pd->lock);
+
+ return 0;
+
+out_detach_device:
+ iommu_detach_device(pd->domain, dev);
+out_free_dev:
+ kfree(uiom_dev);
+ return err;
+}
+
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int found = 0;
+
+ spin_lock(&pd->lock);
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ if (uiom_dev->dev == dev) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ usnic_err("Unable to free dev %s - not found\n",
+ dev_name(dev));
+ spin_unlock(&pd->lock);
+ return;
+ }
+
+ list_del(&uiom_dev->link);
+ pd->dev_cnt--;
+ spin_unlock(&pd->lock);
+
+ return iommu_detach_device(pd->domain, dev);
+}
+
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ struct device **devs;
+ int i = 0;
+
+ spin_lock(&pd->lock);
+ devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
+ if (!devs) {
+ devs = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ devs[i++] = uiom_dev->dev;
+ }
+out:
+ spin_unlock(&pd->lock);
+ return devs;
+}
+
+void usnic_uiom_free_dev_list(struct device **devs)
+{
+ kfree(devs);
+}
+
+int usnic_uiom_init(char *drv_name)
+{
+ if (!iommu_present(&pci_bus_type)) {
+ usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
+ return -EPERM;
+ }
+
+ usnic_uiom_wq = create_workqueue(drv_name);
+ if (!usnic_uiom_wq) {
+ usnic_err("Unable to alloc wq for drv %s\n", drv_name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_uiom_fini(void)
+{
+ flush_workqueue(usnic_uiom_wq);
+ destroy_workqueue(usnic_uiom_wq);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
new file mode 100644
index 00000000000..70440996e8f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_H_
+#define USNIC_UIOM_H_
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "usnic_uiom_interval_tree.h"
+
+#define USNIC_UIOM_READ (1)
+#define USNIC_UIOM_WRITE (2)
+
+#define USNIC_UIOM_MAX_PD_CNT (1000)
+#define USNIC_UIOM_MAX_MR_CNT (1000000)
+#define USNIC_UIOM_MAX_MR_SIZE (~0UL)
+#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE)
+
+struct usnic_uiom_dev {
+ struct device *dev;
+ struct list_head link;
+};
+
+struct usnic_uiom_pd {
+ struct iommu_domain *domain;
+ spinlock_t lock;
+ struct rb_root rb_root;
+ struct list_head devs;
+ int dev_cnt;
+};
+
+struct usnic_uiom_reg {
+ struct usnic_uiom_pd *pd;
+ unsigned long va;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+struct usnic_uiom_chunk {
+ struct list_head list;
+ int nents;
+ struct scatterlist page_list[0];
+};
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
+ struct device *dev);
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
+void usnic_uiom_free_dev_list(struct device **devs);
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int access, int dmasync);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+int usnic_uiom_init(char *drv_name);
+void usnic_uiom_fini(void);
+#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
new file mode 100644
index 00000000000..3a4288e0fba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+#include <linux/interval_tree_generic.h>
+#include "usnic_uiom_interval_tree.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \
+ do { \
+ node = usnic_uiom_interval_node_alloc(start, \
+ end, ref_cnt, flags); \
+ if (!node) { \
+ err = -ENOMEM; \
+ goto err_out; \
+ } \
+ } while (0)
+
+#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
+
+#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \
+ err_out, list) \
+ do { \
+ MAKE_NODE(node, start, end, \
+ ref_cnt, flags, err, \
+ err_out); \
+ MARK_FOR_ADD(node, list); \
+ } while (0)
+
+#define FLAGS_EQUAL(flags1, flags2, mask) \
+ (((flags1) & (mask)) == ((flags2) & (mask)))
+
+static struct usnic_uiom_interval_node*
+usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
+ int flags)
+{
+ struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
+ GFP_ATOMIC);
+ if (!interval)
+ return NULL;
+
+ interval->start = start;
+ interval->last = last;
+ interval->flags = flags;
+ interval->ref_cnt = ref_cnt;
+
+ return interval;
+}
+
+static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct usnic_uiom_interval_node *node_a, *node_b;
+
+ node_a = list_entry(a, struct usnic_uiom_interval_node, link);
+ node_b = list_entry(b, struct usnic_uiom_interval_node, link);
+
+ /* long to int */
+ if (node_a->start < node_b->start)
+ return -1;
+ else if (node_a->start > node_b->start)
+ return 1;
+
+ return 0;
+}
+
+static void
+find_intervals_intersection_sorted(struct rb_root *root, unsigned long start,
+ unsigned long last,
+ struct list_head *list)
+{
+ struct usnic_uiom_interval_node *node;
+
+ INIT_LIST_HEAD(list);
+
+ for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
+ node;
+ node = usnic_uiom_interval_tree_iter_next(node, start, last))
+ list_add_tail(&node->link, list);
+
+ list_sort(NULL, list, interval_cmp);
+}
+
+int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
+ int flags, int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int err = 0;
+ long int pivot = start;
+ LIST_HEAD(intersection_set);
+
+ INIT_LIST_HEAD(diff_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ if (pivot < interval->start) {
+ MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
+ 1, flags, err, err_out,
+ diff_set);
+ pivot = interval->start;
+ }
+
+ /*
+ * Invariant: Set [start, pivot] is either in diff_set or root,
+ * but not in both.
+ */
+
+ if (pivot > interval->last) {
+ continue;
+ } else if (pivot <= interval->last &&
+ FLAGS_EQUAL(interval->flags, flags,
+ flag_mask)) {
+ pivot = interval->last + 1;
+ }
+ }
+
+ if (pivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
+ diff_set);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, diff_set, link) {
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ return err;
+}
+
+void usnic_uiom_put_interval_set(struct list_head *intervals)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ list_for_each_entry_safe(interval, tmp, intervals, link)
+ kfree(interval);
+}
+
+int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, int flags)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ unsigned long istart, ilast;
+ int iref_cnt, iflags;
+ unsigned long lpivot = start;
+ int err = 0;
+ LIST_HEAD(to_add);
+ LIST_HEAD(intersection_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ /*
+ * Invariant - lpivot is the left edge of next interval to be
+ * inserted
+ */
+ istart = interval->start;
+ ilast = interval->last;
+ iref_cnt = interval->ref_cnt;
+ iflags = interval->flags;
+
+ if (istart < lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else if (istart > lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
+ err, err_out, &to_add);
+ lpivot = istart;
+ } else {
+ lpivot = istart;
+ }
+
+ if (ilast > last) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ }
+
+ lpivot = ilast + 1;
+ }
+
+ if (lpivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
+ &to_add);
+
+ list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
+ usnic_uiom_interval_tree_remove(interval, root);
+ kfree(interval);
+ }
+
+ list_for_each_entry(interval, &to_add, link)
+ usnic_uiom_interval_tree_insert(interval, root);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, &to_add, link)
+ kfree(interval);
+
+ return err;
+}
+
+void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, struct list_head *removed)
+{
+ struct usnic_uiom_interval_node *interval;
+
+ for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
+ interval;
+ interval = usnic_uiom_interval_tree_iter_next(interval,
+ start,
+ last)) {
+ if (--interval->ref_cnt == 0)
+ list_add_tail(&interval->link, removed);
+ }
+
+ list_for_each_entry(interval, removed, link)
+ usnic_uiom_interval_tree_remove(interval, root);
+}
+
+INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
+ unsigned long, __subtree_last,
+ START, LAST, , usnic_uiom_interval_tree)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
new file mode 100644
index 00000000000..d4f752e258f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_INTERVAL_TREE_H_
+#define USNIC_UIOM_INTERVAL_TREE_H_
+
+#include <linux/rbtree.h>
+
+struct usnic_uiom_interval_node {
+ struct rb_node rb;
+ struct list_head link;
+ unsigned long start;
+ unsigned long last;
+ unsigned long __subtree_last;
+ unsigned int ref_cnt;
+ int flags;
+};
+
+extern void
+usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern void
+usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_first(struct rb_root *root,
+ unsigned long start,
+ unsigned long last);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
+ unsigned long start, unsigned long last);
+/*
+ * Inserts {start...last} into {root}. If there are overlaps,
+ * nodes will be broken up and merged
+ */
+int usnic_uiom_insert_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ int flags);
+/*
+ * Removed {start...last} from {root}. The nodes removed are returned in
+ * 'removed.' The caller is responsibile for freeing memory of nodes in
+ * 'removed.'
+ */
+void usnic_uiom_remove_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ struct list_head *removed);
+/*
+ * Returns {start...last} - {root} (relative complement of {start...last} in
+ * {root}) in diff_set sorted ascendingly
+ */
+int usnic_uiom_get_intervals_diff(unsigned long start,
+ unsigned long last, int flags,
+ int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set);
+/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
+void usnic_uiom_put_interval_set(struct list_head *intervals);
+#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
new file mode 100644
index 00000000000..656b88c39ed
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "usnic_ib.h"
+#include "vnic_resource.h"
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+
+struct usnic_vnic {
+ struct vnic_dev *vdev;
+ struct vnic_dev_bar bar[PCI_NUM_RESOURCES];
+ struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX];
+ spinlock_t res_lock;
+};
+
+static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ vnic_res_type,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ vnic_res_type,
+ static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return RES_TYPE_MAX;
+
+ return usnic_vnic_type_2_vnic_type[res_type];
+}
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ desc,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ desc,
+ static const char * const usnic_vnic_res_type_desc[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return "unknown";
+
+ return usnic_vnic_res_type_desc[res_type];
+
+}
+
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
+{
+ return pci_name(usnic_vnic_get_pdev(vnic));
+}
+
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
+ int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int))
+{
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_vnic_res *res;
+ struct vnic_dev_bar *bar0;
+ int i, j, offset;
+
+ offset = 0;
+ bar0 = usnic_vnic_get_bar(vnic, 0);
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
+ usnic_vnic_get_index(vnic),
+ &bar0->bus_addr,
+ bar0->vaddr, bar0->len);
+ if (printtitle)
+ offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|RES\t|CTRL_PIN\t\t|IN_USE\t");
+ if (printcols)
+ offset += printcols(buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+
+ spin_lock(&vnic->res_lock);
+ for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
+ chunk = &vnic->chunks[i];
+ for (j = 0; j < chunk->cnt; j++) {
+ res = chunk->res[j];
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|%s[%u]\t|0x%p\t|%u\t",
+ usnic_vnic_res_type_to_str(res->type),
+ res->vnic_idx, res->ctrl, !!res->owner);
+ if (printrow) {
+ offset += printrow(res->owner, buf + offset,
+ buf_sz - offset);
+ }
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "\n");
+ }
+ }
+ spin_unlock(&vnic->res_lock);
+ return offset;
+}
+
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt)
+{
+ int i;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ if (spec->resources[i].type == trgt_type) {
+ spec->resources[i].cnt = cnt;
+ return;
+ }
+ }
+
+ WARN_ON(1);
+}
+
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int found, i, j;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ found = 0;
+
+ for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
+ if (res_spec->resources[i].type !=
+ min_spec->resources[i].type)
+ continue;
+ found = 1;
+ if (min_spec->resources[i].cnt >
+ res_spec->resources[i].cnt)
+ return -EINVAL;
+ break;
+ }
+
+ if (!found)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "Res: %s Cnt: %d ",
+ usnic_vnic_res_type_to_str(res_type),
+ res_cnt);
+ }
+
+ return offset;
+}
+
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int i;
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ if (res_type == USNIC_VNIC_RES_TYPE_EOL)
+ break;
+
+ if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].cnt;
+}
+
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].free_cnt;
+}
+
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
+ int cnt, void *owner)
+{
+ struct usnic_vnic_res_chunk *src, *ret;
+ struct usnic_vnic_res *res;
+ int i;
+
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ return ERR_PTR(-EINVAL);
+
+ ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret) {
+ usnic_err("Failed to allocate chunk for %s - Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
+ }
+
+ spin_unlock(&vnic->res_lock);
+ ret->type = type;
+ ret->vnic = vnic;
+ WARN_ON(ret->cnt != cnt);
+
+ return ret;
+}
+
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
+{
+
+ struct usnic_vnic_res *res;
+ int i;
+ struct usnic_vnic *vnic = chunk->vnic;
+
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
+
+ kfree(chunk->res);
+ kfree(chunk);
+}
+
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
+{
+ return usnic_vnic_get_pdev(vnic)->devfn - 1;
+}
+
+static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ struct usnic_vnic_res_chunk *chunk)
+{
+ int cnt, err, i;
+ struct usnic_vnic_res *res;
+
+ cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
+ if (cnt < 1)
+ return -EINVAL;
+
+ chunk->cnt = chunk->free_cnt = cnt;
+ chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
+ if (!chunk->res)
+ return -ENOMEM;
+
+ for (i = 0; i < cnt; i++) {
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ res->type = type;
+ res->vnic_idx = i;
+ res->vnic = vnic;
+ res->ctrl = vnic_dev_get_res(vnic->vdev,
+ _to_vnic_res_type(type), i);
+ chunk->res[i] = res;
+ }
+
+ chunk->vnic = vnic;
+ return 0;
+fail:
+ for (i--; i >= 0; i--)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+ return err;
+}
+
+static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
+{
+ int i;
+ for (i = 0; i < chunk->cnt; i++)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+}
+
+static int usnic_vnic_discover_resources(struct pci_dev *pdev,
+ struct usnic_vnic *vnic)
+{
+ enum usnic_vnic_res_type res_type;
+ int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ vnic->bar[i].len = pci_resource_len(pdev, i);
+ vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
+ if (!vnic->bar[i].vaddr) {
+ usnic_err("Cannot memory-map BAR %d, aborting\n",
+ i);
+ err = -ENODEV;
+ goto out_clean_bar;
+ }
+ vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
+ }
+
+ vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
+ ARRAY_SIZE(vnic->bar));
+ if (!vnic->vdev) {
+ usnic_err("Failed to register device %s\n",
+ pci_name(pdev));
+ err = -EINVAL;
+ goto out_clean_bar;
+ }
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
+ err = usnic_vnic_alloc_res_chunk(vnic, res_type,
+ &vnic->chunks[res_type]);
+ if (err) {
+ usnic_err("Failed to alloc res %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ err);
+ goto out_clean_chunks;
+ }
+ }
+
+ return 0;
+
+out_clean_chunks:
+ for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+ vnic_dev_unregister(vnic->vdev);
+out_clean_bar:
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ if (!vnic->bar[i].vaddr)
+ break;
+
+ iounmap(vnic->bar[i].vaddr);
+ }
+
+ return err;
+}
+
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
+{
+ return vnic_dev_get_pdev(vnic->vdev);
+}
+
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num)
+{
+ return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
+}
+
+static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
+{
+ int i;
+ struct pci_dev *pdev;
+ enum usnic_vnic_res_type res_type;
+
+ pdev = usnic_vnic_get_pdev(vnic);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+
+ vnic_dev_unregister(vnic->vdev);
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ iounmap(vnic->bar[i].vaddr);
+ }
+}
+
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
+{
+ struct usnic_vnic *vnic;
+ int err = 0;
+
+ if (!pci_is_enabled(pdev)) {
+ usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
+ if (!vnic) {
+ usnic_err("Failed to alloc vnic for %s - out of memory\n",
+ pci_name(pdev));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&vnic->res_lock);
+
+ err = usnic_vnic_discover_resources(pdev, vnic);
+ if (err) {
+ usnic_err("Failed to discover %s resources with err %d\n",
+ pci_name(pdev), err);
+ goto out_free_vnic;
+ }
+
+ usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
+
+ return vnic;
+
+out_free_vnic:
+ kfree(vnic);
+
+ return ERR_PTR(err);
+}
+
+void usnic_vnic_free(struct usnic_vnic *vnic)
+{
+ usnic_vnic_release_resources(vnic);
+ kfree(vnic);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h
new file mode 100644
index 00000000000..14d931a8829
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_VNIC_H_
+#define USNIC_VNIC_H_
+
+#include <linux/pci.h>
+
+#include "vnic_dev.h"
+
+/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */
+#define USNIC_VNIC_RES_TYPES \
+ DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
+ DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
+ DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
+ DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
+ DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
+ DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
+
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
+enum usnic_vnic_res_type {
+ USNIC_VNIC_RES_TYPES
+};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+struct usnic_vnic_res {
+ enum usnic_vnic_res_type type;
+ unsigned int vnic_idx;
+ struct usnic_vnic *vnic;
+ void __iomem *ctrl;
+ void *owner;
+};
+
+struct usnic_vnic_res_chunk {
+ enum usnic_vnic_res_type type;
+ int cnt;
+ int free_cnt;
+ struct usnic_vnic_res **res;
+ struct usnic_vnic *vnic;
+};
+
+struct usnic_vnic_res_desc {
+ enum usnic_vnic_res_type type;
+ uint16_t cnt;
+};
+
+struct usnic_vnic_res_spec {
+ struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
+};
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int));
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt);
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ int cnt,
+ void *owner);
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num);
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
+void usnic_vnic_free(struct usnic_vnic *vnic);
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
+
+#endif /*!USNIC_VNIC_H_*/